Skip to content

Commit

Permalink
add tcpdrop tool
Browse files Browse the repository at this point in the history
  • Loading branch information
brendangregg committed May 30, 2018
1 parent 84900f8 commit 4f13d88
Show file tree
Hide file tree
Showing 5 changed files with 426 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ pair of .c and .py files, and some are directories of files.
- tools/[tcpaccept](tools/tcpaccept.py): Trace TCP passive connections (accept()). [Examples](tools/tcpaccept_example.txt).
- tools/[tcpconnect](tools/tcpconnect.py): Trace TCP active connections (connect()). [Examples](tools/tcpconnect_example.txt).
- tools/[tcpconnlat](tools/tcpconnlat.py): Trace TCP active connection latency (connect()). [Examples](tools/tcpconnlat_example.txt).
- tools/[tcpdrop](tools/tcpdrop.py): Trace kernel-based TCP packet drops with details. [Examples](tools/tcpdrop_example.txt).
- tools/[tcplife](tools/tcplife.py): Trace TCP sessions and summarize lifespan. [Examples](tools/tcplife_example.txt).
- tools/[tcpretrans](tools/tcpretrans.py): Trace TCP retransmits and TLPs. [Examples](tools/tcpretrans_example.txt).
- tools/[tcpstates](tools/tcpstates.py): Trace TCP session state changes with durations. [Examples](tools/tcpstates_example.txt).
Expand Down
73 changes: 73 additions & 0 deletions man/man8/tcpdrop.8
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
.TH tcpdrop 8 "2018-05-30" "USER COMMANDS"
.SH NAME
tcpdrop \- Trace kernel-based TCP packet drops with details. Uses Linux eBPF/bcc.
.SH SYNOPSIS
.B tcpdrop [\-h] [\-T] [\-t] [\-w] [\-s] [\-p PID] [\-D PORTS] [\-L PORTS]
.SH DESCRIPTION
This tool traces TCP packets or segments that were dropped by the kernel, and
shows details from the IP and TCP headers, the socket state, and the
kernel stack trace. This is useful for debugging cases of high kernel drops,
which can cause timer-based retransmits and performance issues.

This tool works using dynamic tracing of the tcp_drop() kernel function,
which requires a recent kernel version.

Since this uses BPF, only the root user can use this tool.
.SH REQUIREMENTS
CONFIG_BPF and bcc.
.SH OPTIONS
.TP
\-h
Print usage message.
.B tcpdrop
.SH FIELDS
.TP
TIME
Time of the drop, in HH:MM:SS format.
.TP
PID
Process ID that was on-CPU during the drop. This may be unrelated, as drops
can occur on the receive interrupt and be unrelated to the PID that was
interrupted.
.TP
IP
IP address family (4 or 6)
.TP
SADDR
Source IP address.
.TP
SPORT
Source TCP port.
.TP
DADDR
Destination IP address.
.TP
DPORT
Destionation TCP port.
.TP
STATE
TCP session state ("ESTABLISHED", etc).
.TP
FLAGS
TCP flags ("SYN", etc).
.SH OVERHEAD
This traces the kernel tcp_drop() function, which should be low frequency,
and therefore the overhead of this tool should be negligible.

As always, test and understand this tools overhead for your types of
workloads before production use.
.SH SOURCE
This is from bcc.
.IP
https://github.com/iovisor/bcc
.PP
Also look in the bcc distribution for a companion _examples.txt file containing
example usage, output, and commentary for this tool.
.SH OS
Linux
.SH STABILITY
Unstable - in development.
.SH AUTHOR
Brendan Gregg
.SH SEE ALSO
tcplife(8), tcpaccept(8), tcpconnect(8), tcptop(8)
58 changes: 58 additions & 0 deletions src/python/bcc/tcp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Copyright 2018 Netflix, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http:https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# from include/net/tcp_states.h:
tcpstate = {}
tcpstate[1] = 'ESTABLISHED'
tcpstate[2] = 'SYN_SENT'
tcpstate[3] = 'SYN_RECV'
tcpstate[4] = 'FIN_WAIT1'
tcpstate[5] = 'FIN_WAIT2'
tcpstate[6] = 'TIME_WAIT'
tcpstate[7] = 'CLOSE'
tcpstate[8] = 'CLOSE_WAIT'
tcpstate[9] = 'LAST_ACK'
tcpstate[10] = 'LISTEN'
tcpstate[11] = 'CLOSING'
tcpstate[12] = 'NEW_SYN_RECV'

# from include/net/tcp.h:
TCPHDR_FIN = 0x01;
TCPHDR_SYN = 0x02;
TCPHDR_RST = 0x04;
TCPHDR_PSH = 0x08;
TCPHDR_ACK = 0x10;
TCPHDR_URG = 0x20;
TCPHDR_ECE = 0x40;
TCPHDR_CWR = 0x80;

def flags2str(flags):
arr = [];
if flags & TCPHDR_FIN:
arr.append("FIN");
if flags & TCPHDR_SYN:
arr.append("SYN");
if flags & TCPHDR_RST:
arr.append("RST");
if flags & TCPHDR_PSH:
arr.append("PSH");
if flags & TCPHDR_ACK:
arr.append("ACK");
if flags & TCPHDR_URG:
arr.append("URG");
if flags & TCPHDR_ECE:
arr.append("ECE");
if flags & TCPHDR_CWR:
arr.append("CWR");
return "|".join(arr);
222 changes: 222 additions & 0 deletions tools/tcpdrop.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
#!/usr/bin/python
# @lint-avoid-python-3-compatibility-imports
#
# tcpdrop Trace TCP kernel-dropped packets/segments.
# For Linux, uses BCC, eBPF. Embedded C.
#
# This provides information such as packet details, socket state, and kernel
# stack trace for packets/segments that were dropped via tcp_drop().
#
# USAGE: tcpdrop [-c] [-h] [-l]
#
# This uses dynamic tracing of kernel functions, and will need to be updated
# to match kernel changes.
#
# Copyright 2018 Netflix, Inc.
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 30-May-2018 Brendan Gregg Created this.

from __future__ import print_function
from bcc import BPF
import argparse
from time import strftime
from socket import inet_ntop, AF_INET, AF_INET6
from struct import pack
import ctypes as ct
from time import sleep
from bcc import tcp

# arguments
examples = """examples:
./tcpdrop # trace kernel TCP drops
"""
parser = argparse.ArgumentParser(
description="Trace TCP drops by the kernel",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=examples)
parser.add_argument("--ebpf", action="store_true",
help=argparse.SUPPRESS)
args = parser.parse_args()
debug = 0

# define BPF program
bpf_text = """
#include <uapi/linux/ptrace.h>
#include <uapi/linux/tcp.h>
#include <uapi/linux/ip.h>
#include <net/sock.h>
#include <bcc/proto.h>
BPF_STACK_TRACE(stack_traces, 1024);
// separate data structs for ipv4 and ipv6
struct ipv4_data_t {
// XXX: switch some to u32's when supported
u64 stack_id;
u64 pid;
u64 ip;
u64 saddr;
u64 daddr;
u64 sport;
u64 dport;
u64 state;
u64 tcpflags;
};
BPF_PERF_OUTPUT(ipv4_events);
struct ipv6_data_t {
u64 stack_id;
u64 pid;
u64 ip;
unsigned __int128 saddr;
unsigned __int128 daddr;
u64 sport;
u64 dport;
u64 state;
u64 tcpflags;
};
BPF_PERF_OUTPUT(ipv6_events);
static struct tcphdr *skb_to_tcphdr(const struct sk_buff *skb)
{
// unstable API. verify logic in tcp_hdr() -> skb_transport_header().
return (struct tcphdr *)(skb->head + skb->transport_header);
}
static inline struct iphdr *skb_to_iphdr(const struct sk_buff *skb)
{
// unstable API. verify logic in ip_hdr() -> skb_network_header().
return (struct iphdr *)(skb->head + skb->network_header);
}
// from include/net/tcp.h:
#ifndef tcp_flag_byte
#define tcp_flag_byte(th) (((u_int8_t *)th)[13])
#endif
int trace_tcp_drop(struct pt_regs *ctx, struct sock *sk, struct sk_buff *skb)
{
if (sk == NULL)
return 0;
u32 pid = bpf_get_current_pid_tgid();
// pull in details from the packet headers and the sock struct
u16 family = sk->__sk_common.skc_family;
char state = sk->__sk_common.skc_state;
u16 sport = 0, dport = 0;
u8 tcpflags = 0;
struct tcphdr *tcp = skb_to_tcphdr(skb);
struct iphdr *ip = skb_to_iphdr(skb);
bpf_probe_read(&sport, sizeof(sport), &tcp->source);
bpf_probe_read(&dport, sizeof(dport), &tcp->dest);
bpf_probe_read(&tcpflags, sizeof(tcpflags), &tcp_flag_byte(tcp));
sport = ntohs(sport);
dport = ntohs(dport);
if (family == AF_INET) {
struct ipv4_data_t data4 = {.pid = pid, .ip = 4};
bpf_probe_read(&data4.saddr, sizeof(u32), &ip->saddr);
bpf_probe_read(&data4.daddr, sizeof(u32), &ip->daddr);
data4.dport = dport;
data4.sport = sport;
data4.tcpflags = tcpflags;
data4.state = state;
data4.stack_id = stack_traces.get_stackid(ctx, 0);
ipv4_events.perf_submit(ctx, &data4, sizeof(data4));
} else if (family == AF_INET6) {
struct ipv6_data_t data6 = {.pid = pid, .ip = 6};
bpf_probe_read(&data6.saddr, sizeof(data6.saddr),
sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
bpf_probe_read(&data6.daddr, sizeof(data6.daddr),
sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
data6.dport = dport;
data6.sport = sport;
data6.tcpflags = tcpflags;
data6.state = state;
data6.stack_id = stack_traces.get_stackid(ctx, 0);
ipv6_events.perf_submit(ctx, &data6, sizeof(data6));
}
// else drop
return 0;
}
"""

if debug or args.ebpf:
print(bpf_text)
if args.ebpf:
exit()

# event data
class Data_ipv4(ct.Structure):
_fields_ = [
("stack_id", ct.c_ulonglong),
("pid", ct.c_ulonglong),
("ip", ct.c_ulonglong),
("saddr", ct.c_ulonglong),
("daddr", ct.c_ulonglong),
("sport", ct.c_ulonglong),
("dport", ct.c_ulonglong),
("state", ct.c_ulonglong),
("tcpflags", ct.c_ulonglong)
]

class Data_ipv6(ct.Structure):
_fields_ = [
("stack_id", ct.c_ulonglong),
("pid", ct.c_ulonglong),
("ip", ct.c_ulonglong),
("saddr", (ct.c_ulonglong * 2)),
("daddr", (ct.c_ulonglong * 2)),
("sport", ct.c_ulonglong),
("dport", ct.c_ulonglong),
("state", ct.c_ulonglong),
("tcpflags", ct.c_ulonglong)
]

# process event
def print_ipv4_event(cpu, data, size):
event = ct.cast(data, ct.POINTER(Data_ipv4)).contents
print("%-8s %-6d %-2d %-20s > %-20s %s (%s)" % (
strftime("%H:%M:%S"), event.pid, event.ip,
"%s:%d" % (inet_ntop(AF_INET, pack('I', event.saddr)), event.sport),
"%s:%s" % (inet_ntop(AF_INET, pack('I', event.daddr)), event.dport),
tcp.tcpstate[event.state], tcp.flags2str(event.tcpflags)))
for addr in stack_traces.walk(event.stack_id):
sym = b.ksym(addr, show_offset=True)
print("\t%s" % sym)
print("")

def print_ipv6_event(cpu, data, size):
event = ct.cast(data, ct.POINTER(Data_ipv6)).contents
print("%-8s %-6d %-2d %-20s > %-20s %s (%s)" % (
strftime("%H:%M:%S"), event.pid, event.ip,
"%s:%d" % (inet_ntop(AF_INET6, event.saddr), event.sport),
"%s:%d" % (inet_ntop(AF_INET6, event.daddr), event.dport),
tcp.tcpstate[event.state], tcp.flags2str(event.tcpflags)))
for addr in stack_traces.walk(event.stack_id):
sym = b.ksym(addr, show_offset=True)
print("\t%s" % sym)
print("")

# initialize BPF
b = BPF(text=bpf_text)
if b.get_kprobe_functions("tcp_drop"):
b.attach_kprobe(event="tcp_drop", fn_name="trace_tcp_drop")
else:
print("ERROR: tcp_drop() kernel function not found or traceable. "
"Older kernel versions not supported.")
exit()
stack_traces = b.get_table("stack_traces")

# header
print("%-8s %-6s %-2s %-20s > %-20s %s (%s)" % ("TIME", "PID", "IP",
"SADDR:SPORT", "DADDR:DPORT", "STATE", "FLAGS"))

# read events
b["ipv4_events"].open_perf_buffer(print_ipv4_event)
b["ipv6_events"].open_perf_buffer(print_ipv6_event)
while 1:
b.perf_buffer_poll()
Loading

0 comments on commit 4f13d88

Please sign in to comment.