forked from iovisor/bcc
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
5bfadab
commit 2757f0e
Showing
4 changed files
with
291 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
.TH dcsnoop 8 "2016-02-10" "USER COMMANDS" | ||
.SH NAME | ||
dcsnoop \- Trace directory entry cache (dcache) lookups. Uses Linux eBPF/bcc. | ||
.SH SYNOPSIS | ||
.B dcsnoop [\-h] [\-a] | ||
.SH DESCRIPTION | ||
By default, this traces every failed dcache lookup (cache miss), and shows the | ||
process performing the lookup and the filename requested. A \-a option can be | ||
used to show all lookups, not just failed ones. | ||
|
||
The output of this tool can be verbose, and is intended for further | ||
investigations of dcache performance beyond dcstat(8), which prints | ||
per-second summaries. | ||
|
||
This uses kernel dynamic tracing of the d_lookup() function, and will need | ||
and will need updating to match any changes to this function. | ||
|
||
Since this uses BPF, only the root user can use this tool. | ||
.SH REQUIREMENTS | ||
CONFIG_BPF and bcc. | ||
.SH OPTIONS | ||
.TP | ||
\-h | ||
Print usage message. | ||
.TP | ||
\-a | ||
Trace references, not just failed lookups. | ||
.SH EXAMPLES | ||
.TP | ||
Trace failed dcache lookups: | ||
# | ||
.B dcsnoop | ||
.TP | ||
Trace all dcache lookups: | ||
# | ||
.B dcsnoop \-a | ||
.SH FIELDS | ||
.TP | ||
TIME(s) | ||
Time of lookup, in seconds. | ||
.TP | ||
PID | ||
Process ID. | ||
.TP | ||
COMM | ||
Process name. | ||
.TP | ||
T | ||
Type: R == reference (only visible with \-a), M == miss. A miss will print two | ||
lines, one for the reference, and one for the miss. | ||
.TP | ||
FILE | ||
The file name component that was being looked up. This contains trailing | ||
pathname components (after '/'), which will be the subject of subsequent | ||
lookups. | ||
.SH OVERHEAD | ||
File name lookups can be frequent (depending on the workload), and this tool | ||
prints a line for each failed lookup, and with \-a, each reference as well. The | ||
output may be verbose, and the incurred overhead, while optimized to some | ||
extent, may still be from noticeable to significant. This is only really | ||
intended for deeper investigations beyond dcstat(8), when absolutely necessary. | ||
Measure and quantify the overhead in a test environment before use. | ||
.SH SOURCE | ||
This is from bcc. | ||
.IP | ||
https://github.com/iovisor/bcc | ||
.PP | ||
Also look in the bcc distribution for a companion _examples.txt file containing | ||
example usage, output, and commentary for this tool. | ||
.SH OS | ||
Linux | ||
.SH STABILITY | ||
Unstable - in development. | ||
.SH AUTHOR | ||
Brendan Gregg | ||
.SH SEE ALSO | ||
dcstat(1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
#!/usr/bin/python | ||
# @lint-avoid-python-3-compatibility-imports | ||
# | ||
# dcsnoop Trace directory entry cache (dcache) lookups. | ||
# For Linux, uses BCC, eBPF. Embedded C. | ||
# | ||
# USAGE: dcsnoop [-h] [-a] | ||
# | ||
# By default, this traces every failed dcache lookup, and shows the process | ||
# performing the lookup and the filename requested. A -a option can be used | ||
# to show all lookups, not just failed ones. | ||
# | ||
# This uses kernel dynamic tracing of the d_lookup() function, and will need | ||
# to be modified to match kernel changes. | ||
# | ||
# Also see dcstat(8), for per-second summaries. | ||
# | ||
# Copyright 2016 Netflix, Inc. | ||
# Licensed under the Apache License, Version 2.0 (the "License") | ||
# | ||
# 09-Feb-2016 Brendan Gregg Created this. | ||
|
||
from __future__ import print_function | ||
from bcc import BPF | ||
import argparse | ||
import re | ||
|
||
# arguments | ||
examples = """examples: | ||
./dcsnoop # trace failed dcache lookups | ||
./dcsnoop -a # trace all dcache lookups | ||
""" | ||
parser = argparse.ArgumentParser( | ||
description="Trace directory entry cache (dcache) lookups", | ||
formatter_class=argparse.RawDescriptionHelpFormatter, | ||
epilog=examples) | ||
parser.add_argument("-a", "--all", action="store_true", | ||
help="trace all lookups (default is fails only)") | ||
args = parser.parse_args() | ||
|
||
# define BPF program | ||
bpf_text = """ | ||
#include <uapi/linux/ptrace.h> | ||
#include <linux/fs.h> | ||
#include <linux/sched.h> | ||
#define MAX_FILE_LEN 64 | ||
struct entry_t { | ||
char name[MAX_FILE_LEN]; | ||
}; | ||
BPF_HASH(entrybypid, u32, struct entry_t); | ||
/* from fs/namei.c: */ | ||
struct nameidata { | ||
struct path path; | ||
struct qstr last; | ||
// [...] | ||
}; | ||
int trace_fast(struct pt_regs *ctx, struct nameidata *nd, struct path *path) | ||
{ | ||
bpf_trace_printk("R %s\\n", nd->last.name); | ||
return 1; | ||
} | ||
int kprobe__d_lookup(struct pt_regs *ctx, const struct dentry *parent, | ||
const struct qstr *name) | ||
{ | ||
u32 pid = bpf_get_current_pid_tgid(); | ||
struct entry_t entry = {}; | ||
if (name->name) { | ||
bpf_probe_read(&entry.name, sizeof(entry.name), (void *)name->name); | ||
} | ||
entrybypid.update(&pid, &entry); | ||
return 0; | ||
} | ||
int kretprobe__d_lookup(struct pt_regs *ctx) | ||
{ | ||
u32 pid = bpf_get_current_pid_tgid(); | ||
struct entry_t *ep; | ||
ep = entrybypid.lookup(&pid); | ||
if (ep == 0) { | ||
return 0; // missed entry | ||
} | ||
if (ctx->ax == 0) { | ||
bpf_trace_printk("M %s\\n", ep->name); | ||
} | ||
entrybypid.delete(&pid); | ||
return 0; | ||
} | ||
""" | ||
|
||
# initialize BPF | ||
b = BPF(text=bpf_text) | ||
if args.all: | ||
b.attach_kprobe(event="lookup_fast", fn_name="trace_fast") | ||
|
||
# header | ||
print("%-11s %-6s %-16s %1s %s" % ("TIME(s)", "PID", "COMM", "T", "FILE")) | ||
|
||
start_ts = 0 | ||
|
||
# format output | ||
while 1: | ||
(task, pid, cpu, flags, ts, msg) = b.trace_fields() | ||
try: | ||
(type, file) = msg.split(" ", 1) | ||
except ValueError: | ||
continue | ||
|
||
if start_ts == 0: | ||
start_ts = ts | ||
|
||
print("%-11.6f %-6s %-16s %1s %s" % (ts - start_ts, pid, task, type, file)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
Demonstrations of dcsnoop, the Linux eBPF/bcc version. | ||
|
||
|
||
dcsnoop traces directory entry cache (dcache) lookups, and can be used for | ||
further investigation beyond dcstat(8). The output is likely verbose, as | ||
dcache lookups are likely frequent. By default, only failed lookups are shown. | ||
For example: | ||
|
||
# ./dcsnoop.py | ||
TIME(s) PID COMM T FILE | ||
0.002837 1643 snmpd M net/dev | ||
0.002852 1643 snmpd M 1643 | ||
0.002856 1643 snmpd M net | ||
0.002863 1643 snmpd M dev | ||
0.002952 1643 snmpd M net/if_inet6 | ||
0.002964 1643 snmpd M if_inet6 | ||
0.003180 1643 snmpd M net/ipv4/neigh/eth0/retrans_time_ms | ||
0.003192 1643 snmpd M ipv4/neigh/eth0/retrans_time_ms | ||
0.003197 1643 snmpd M neigh/eth0/retrans_time_ms | ||
0.003203 1643 snmpd M eth0/retrans_time_ms | ||
0.003206 1643 snmpd M retrans_time_ms | ||
0.003245 1643 snmpd M ipv6/neigh/eth0/retrans_time_ms | ||
0.003249 1643 snmpd M neigh/eth0/retrans_time_ms | ||
0.003252 1643 snmpd M eth0/retrans_time_ms | ||
0.003255 1643 snmpd M retrans_time_ms | ||
0.003287 1643 snmpd M conf/eth0/forwarding | ||
0.003292 1643 snmpd M eth0/forwarding | ||
0.003295 1643 snmpd M forwarding | ||
0.003326 1643 snmpd M base_reachable_time_ms | ||
[...] | ||
|
||
I ran a drop caches at the same time as executing this tool. The output shows | ||
the processes, the type of event ("T" column: M == miss, R == reference), | ||
and the filename for the dcache lookup. | ||
|
||
The way the dcache is currently implemented, each component of a path is | ||
checked in turn. The first line, showing "net/dev" from snmp, will be a lookup | ||
for "net" in a directory (that isn't shown here). If it finds "net", it will | ||
then lookup "dev" inside net. You can see this sequence a little later, | ||
starting at time 0.003180, where a pathname is being searched | ||
directory by directory. | ||
|
||
|
||
The -a option will show all lookups, although be warned, the output will be | ||
very verbose. For example: | ||
|
||
# ./dcsnoop | ||
TIME(s) PID COMM T FILE | ||
0.000000 20279 dcsnoop.py M p_lookup_fast | ||
0.000010 20279 dcsnoop.py M enable | ||
0.000013 20279 dcsnoop.py M id | ||
0.000015 20279 dcsnoop.py M filter | ||
0.000017 20279 dcsnoop.py M trigger | ||
0.000019 20279 dcsnoop.py M format | ||
0.006148 20279 dcsnoop.py R sys/kernel/debug/tracing/trace_pipe | ||
0.006158 20279 dcsnoop.py R kernel/debug/tracing/trace_pipe | ||
0.006161 20279 dcsnoop.py R debug/tracing/trace_pipe | ||
0.006164 20279 dcsnoop.py R tracing/trace_pipe | ||
0.006166 20279 dcsnoop.py R trace_pipe | ||
0.015900 1643 snmpd R proc/sys/net/ipv6/conf/lo/forwarding | ||
0.015901 1643 snmpd R sys/net/ipv6/conf/lo/forwarding | ||
0.015901 1643 snmpd R net/ipv6/conf/lo/forwarding | ||
0.015902 1643 snmpd R ipv6/conf/lo/forwarding | ||
0.015903 1643 snmpd R conf/lo/forwarding | ||
0.015904 1643 snmpd R lo/forwarding | ||
0.015905 1643 snmpd M lo/forwarding | ||
0.015908 1643 snmpd R forwarding | ||
0.015909 1643 snmpd M forwarding | ||
0.015937 1643 snmpd R proc/sys/net/ipv6/neigh/lo/base_reachable_time_ms | ||
0.015937 1643 snmpd R sys/net/ipv6/neigh/lo/base_reachable_time_ms | ||
0.015938 1643 snmpd R net/ipv6/neigh/lo/base_reachable_time_ms | ||
0.015939 1643 snmpd R ipv6/neigh/lo/base_reachable_time_ms | ||
0.015940 1643 snmpd R neigh/lo/base_reachable_time_ms | ||
0.015941 1643 snmpd R lo/base_reachable_time_ms | ||
0.015941 1643 snmpd R base_reachable_time_ms | ||
0.015943 1643 snmpd M base_reachable_time_ms | ||
0.043569 1876 supervise M 20281 | ||
0.043573 1886 supervise M 20280 | ||
0.043582 1886 supervise R supervise/status.new | ||
[...] | ||
|
||
|
||
USAGE message: | ||
|
||
# ./dcsnoop.py -h | ||
usage: dcsnoop.py [-h] [-a] | ||
|
||
Trace directory entry cache (dcache) lookups | ||
|
||
optional arguments: | ||
-h, --help show this help message and exit | ||
-a, --all trace all lookups (default is fails only) | ||
|
||
examples: | ||
./dcsnoop # trace failed dcache lookups | ||
./dcsnoop -a # trace all dcache lookups |