Skip to content

Commit

Permalink
migrated gethostlatency to use bpf_perf_event
Browse files Browse the repository at this point in the history
  • Loading branch information
mcaleavya committed Apr 2, 2016
1 parent 31571b4 commit a9b886c
Show file tree
Hide file tree
Showing 3 changed files with 151 additions and 15 deletions.
4 changes: 4 additions & 0 deletions man/man8/gethostlatency.8
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ latency of the call (duration) in milliseconds, and the host string.
This tool can be useful for identifying DNS latency, by identifying which
remote host name lookups were slow, and by how much.

This makes use of a Linux 4.5 feature (bpf_perf_event_output());
for kernels older than 4.5, see the version under tools/old,
which uses an older mechanism

This tool currently uses dynamic tracing of user-level functions and registers,
and may need modifications to match your software and processor architecture.

Expand Down
85 changes: 70 additions & 15 deletions tools/gethostlatency.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,43 +13,71 @@
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 28-Jan-2016 Brendan Gregg Created this.
# 30-Mar-2016 Allan McAleavy updated for BPF_PERF_OUTPUT

from __future__ import print_function
from bcc import BPF
from time import strftime
import ctypes as ct

# load BPF program
bpf_text = """
#include <uapi/linux/ptrace.h>
#include <linux/sched.h>
struct val_t {
u32 pid;
char comm[TASK_COMM_LEN];
char host[80];
u64 ts;
};
struct data_t {
u32 pid;
u64 ts;
u64 delta;
char comm[TASK_COMM_LEN];
char host[80];
};
BPF_HASH(start, u32, struct val_t);
BPF_PERF_OUTPUT(events);
int do_entry(struct pt_regs *ctx) {
if (!ctx->di)
return 0;
struct val_t val = {};
u32 pid = bpf_get_current_pid_tgid();
bpf_probe_read(&val.host, sizeof(val.host), (void *)ctx->di);
val.ts = bpf_ktime_get_ns();
start.update(&pid, &val);
if (bpf_get_current_comm(&val.comm, sizeof(val.comm)) == 0) {
bpf_probe_read(&val.host, sizeof(val.host), (void *)ctx->di);
val.pid = bpf_get_current_pid_tgid();
val.ts = bpf_ktime_get_ns();
start.update(&pid, &val);
}
return 0;
}
int do_return(struct pt_regs *ctx) {
struct val_t *valp;
struct data_t data = {};
u64 delta;
u32 pid = bpf_get_current_pid_tgid();
u64 tsp = bpf_ktime_get_ns();
valp = start.lookup(&pid);
if (valp == 0)
return 0; // missed start
delta = (bpf_ktime_get_ns() - valp->ts) / 1000;
bpf_trace_printk("%d %s\\n", delta, valp->host);
bpf_probe_read(&data.comm, sizeof(data.comm), valp->comm);
bpf_probe_read(&data.host, sizeof(data.host), (void *)valp->host);
data.pid = valp->pid;
data.delta = tsp - valp->ts;
data.ts = tsp / 1000;
events.perf_submit(ctx, &data, sizeof(data));
start.delete(&pid);
return 0;
}
Expand All @@ -62,16 +90,43 @@
b.attach_uretprobe(name="c", sym="gethostbyname", fn_name="do_return")
b.attach_uretprobe(name="c", sym="gethostbyname2", fn_name="do_return")

TASK_COMM_LEN = 16 # linux/sched.h

class Data(ct.Structure):
_fields_ = [
("pid", ct.c_ulonglong),
("ts", ct.c_ulonglong),
("delta", ct.c_ulonglong),
("comm", ct.c_char * TASK_COMM_LEN),
("host", ct.c_char * 80)
]

start_ts = 0
prev_ts = 0
delta = 0

# header
print("%-9s %-6s %-12s %6s %s" % ("TIME", "PID", "COMM", "LATms", "HOST"))
print("%-9s %-6s %-16s %10s %s" % ("TIME", "PID", "COMM", "LATms", "HOST"))

def print_event(cpu, data, size):
event = ct.cast(data, ct.POINTER(Data)).contents
global start_ts
global prev_ts
global delta

if start_ts == 0:
prev_ts = start_ts

if start_ts == 1:
delta = float(delta) + (event.ts - prev_ts)

print("%-9s %-6d %-16s %10.2f %s" % (strftime("%H:%M:%S"), event.pid,
event.comm, (event.delta / 1000000), event.host))

prev_ts = event.ts
start_ts = 1

# format output
# loop with callback to print_event
b["events"].open_perf_buffer(print_event)
while 1:
try:
(task, pid, cpu, flags, ts, msg) = b.trace_fields()
except ValueError:
continue
(delta, host) = msg.split(" ")
deltams = int(delta) / 1000
print("%-9s %-6d %-12.12s %6.2f %s" % (strftime("%H:%M:%S"), pid, task,
deltams, host))
b.kprobe_poll()
77 changes: 77 additions & 0 deletions tools/old/gethostlatency.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#!/usr/bin/python
#
# gethostlatency Show latency for getaddrinfo/gethostbyname[2] calls.
# For Linux, uses BCC, eBPF. Embedded C.
#
# This can be useful for identifying DNS latency, by identifying which
# remote host name lookups were slow, and by how much.
#
# This uses dynamic tracing of user-level functions and registers, and may
# need modifications to match your software and processor architecture.
#
# Copyright 2016 Netflix, Inc.
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 28-Jan-2016 Brendan Gregg Created this.

from __future__ import print_function
from bcc import BPF
from time import strftime

# load BPF program
bpf_text = """
#include <uapi/linux/ptrace.h>
struct val_t {
char host[80];
u64 ts;
};
BPF_HASH(start, u32, struct val_t);
int do_entry(struct pt_regs *ctx) {
if (!ctx->di)
return 0;
struct val_t val = {};
u32 pid = bpf_get_current_pid_tgid();
bpf_probe_read(&val.host, sizeof(val.host), (void *)ctx->di);
val.ts = bpf_ktime_get_ns();
start.update(&pid, &val);
return 0;
}
int do_return(struct pt_regs *ctx) {
struct val_t *valp;
u64 delta;
u32 pid = bpf_get_current_pid_tgid();
valp = start.lookup(&pid);
if (valp == 0)
return 0; // missed start
delta = (bpf_ktime_get_ns() - valp->ts) / 1000;
bpf_trace_printk("%d %s\\n", delta, valp->host);
start.delete(&pid);
return 0;
}
"""
b = BPF(text=bpf_text)
b.attach_uprobe(name="c", sym="getaddrinfo", fn_name="do_entry")
b.attach_uprobe(name="c", sym="gethostbyname", fn_name="do_entry")
b.attach_uprobe(name="c", sym="gethostbyname2", fn_name="do_entry")
b.attach_uretprobe(name="c", sym="getaddrinfo", fn_name="do_return")
b.attach_uretprobe(name="c", sym="gethostbyname", fn_name="do_return")
b.attach_uretprobe(name="c", sym="gethostbyname2", fn_name="do_return")

# header
print("%-9s %-6s %-12s %6s %s" % ("TIME", "PID", "COMM", "LATms", "HOST"))

# format output
while 1:
try:
(task, pid, cpu, flags, ts, msg) = b.trace_fields()
except ValueError:
continue
(delta, host) = msg.split(" ")
deltams = int(delta) / 1000
print("%-9s %-6d %-12.12s %6.2f %s" % (strftime("%H:%M:%S"), pid, task,
deltams, host))

0 comments on commit a9b886c

Please sign in to comment.