#!/usr/bin/python # # gethostlatency Show latency for getaddrinfo/gethostbyname[2] calls. # For Linux, uses BCC, eBPF. Embedded C. # # This can be useful for identifying DNS latency, by identifying which # remote host name lookups were slow, and by how much. # # This uses dynamic tracing of user-level functions and registers, and may # need modifications to match your software and processor architecture. # # Copyright 2016 Netflix, Inc. # Licensed under the Apache License, Version 2.0 (the "License") # # 28-Jan-2016 Brendan Gregg Created this. # 30-Mar-2016 Allan McAleavy updated for BPF_PERF_OUTPUT from __future__ import print_function from bcc import BPF from time import strftime import argparse import ctypes as ct examples = """examples: ./gethostlatency # trace all TCP accept()s ./gethostlatency -p 181 # only trace PID 181 """ parser = argparse.ArgumentParser( description="Show latency for getaddrinfo/gethostbyname[2] calls", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=examples) parser.add_argument("-p", "--pid", help="trace this PID only", type=int, default=-1) parser.add_argument("--ebpf", action="store_true", help=argparse.SUPPRESS) args = parser.parse_args() # load BPF program bpf_text = """ #include #include struct val_t { u32 pid; char comm[TASK_COMM_LEN]; char host[80]; u64 ts; }; struct data_t { u32 pid; u64 delta; char comm[TASK_COMM_LEN]; char host[80]; }; BPF_HASH(start, u32, struct val_t); BPF_PERF_OUTPUT(events); int do_entry(struct pt_regs *ctx) { if (!PT_REGS_PARM1(ctx)) return 0; struct val_t val = {}; u32 pid = bpf_get_current_pid_tgid(); if (bpf_get_current_comm(&val.comm, sizeof(val.comm)) == 0) { bpf_probe_read(&val.host, sizeof(val.host), (void *)PT_REGS_PARM1(ctx)); val.pid = bpf_get_current_pid_tgid(); val.ts = bpf_ktime_get_ns(); start.update(&pid, &val); } return 0; } int do_return(struct pt_regs *ctx) { struct val_t *valp; struct data_t data = {}; u64 delta; u32 pid = bpf_get_current_pid_tgid(); u64 tsp = bpf_ktime_get_ns(); valp = start.lookup(&pid); if (valp == 0) return 0; // missed start bpf_probe_read(&data.comm, sizeof(data.comm), valp->comm); bpf_probe_read(&data.host, sizeof(data.host), (void *)valp->host); data.pid = valp->pid; data.delta = tsp - valp->ts; events.perf_submit(ctx, &data, sizeof(data)); start.delete(&pid); return 0; } """ if args.ebpf: print(bpf_text) exit() b = BPF(text=bpf_text) b.attach_uprobe(name="c", sym="getaddrinfo", fn_name="do_entry", pid=args.pid) b.attach_uprobe(name="c", sym="gethostbyname", fn_name="do_entry", pid=args.pid) b.attach_uprobe(name="c", sym="gethostbyname2", fn_name="do_entry", pid=args.pid) b.attach_uretprobe(name="c", sym="getaddrinfo", fn_name="do_return", pid=args.pid) b.attach_uretprobe(name="c", sym="gethostbyname", fn_name="do_return", pid=args.pid) b.attach_uretprobe(name="c", sym="gethostbyname2", fn_name="do_return", pid=args.pid) TASK_COMM_LEN = 16 # linux/sched.h class Data(ct.Structure): _fields_ = [ ("pid", ct.c_ulonglong), ("delta", ct.c_ulonglong), ("comm", ct.c_char * TASK_COMM_LEN), ("host", ct.c_char * 80) ] # header print("%-9s %-6s %-16s %10s %s" % ("TIME", "PID", "COMM", "LATms", "HOST")) def print_event(cpu, data, size): event = ct.cast(data, ct.POINTER(Data)).contents print("%-9s %-6d %-16s %10.2f %s" % (strftime("%H:%M:%S"), event.pid, event.comm.decode(), (float(event.delta) / 1000000), event.host.decode())) # loop with callback to print_event b["events"].open_perf_buffer(print_event) while 1: b.kprobe_poll()