forked from iovisor/bcc
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
84900f8
commit 4f13d88
Showing
5 changed files
with
426 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
.TH tcpdrop 8 "2018-05-30" "USER COMMANDS" | ||
.SH NAME | ||
tcpdrop \- Trace kernel-based TCP packet drops with details. Uses Linux eBPF/bcc. | ||
.SH SYNOPSIS | ||
.B tcpdrop [\-h] [\-T] [\-t] [\-w] [\-s] [\-p PID] [\-D PORTS] [\-L PORTS] | ||
.SH DESCRIPTION | ||
This tool traces TCP packets or segments that were dropped by the kernel, and | ||
shows details from the IP and TCP headers, the socket state, and the | ||
kernel stack trace. This is useful for debugging cases of high kernel drops, | ||
which can cause timer-based retransmits and performance issues. | ||
|
||
This tool works using dynamic tracing of the tcp_drop() kernel function, | ||
which requires a recent kernel version. | ||
|
||
Since this uses BPF, only the root user can use this tool. | ||
.SH REQUIREMENTS | ||
CONFIG_BPF and bcc. | ||
.SH OPTIONS | ||
.TP | ||
\-h | ||
Print usage message. | ||
.B tcpdrop | ||
.SH FIELDS | ||
.TP | ||
TIME | ||
Time of the drop, in HH:MM:SS format. | ||
.TP | ||
PID | ||
Process ID that was on-CPU during the drop. This may be unrelated, as drops | ||
can occur on the receive interrupt and be unrelated to the PID that was | ||
interrupted. | ||
.TP | ||
IP | ||
IP address family (4 or 6) | ||
.TP | ||
SADDR | ||
Source IP address. | ||
.TP | ||
SPORT | ||
Source TCP port. | ||
.TP | ||
DADDR | ||
Destination IP address. | ||
.TP | ||
DPORT | ||
Destionation TCP port. | ||
.TP | ||
STATE | ||
TCP session state ("ESTABLISHED", etc). | ||
.TP | ||
FLAGS | ||
TCP flags ("SYN", etc). | ||
.SH OVERHEAD | ||
This traces the kernel tcp_drop() function, which should be low frequency, | ||
and therefore the overhead of this tool should be negligible. | ||
|
||
As always, test and understand this tools overhead for your types of | ||
workloads before production use. | ||
.SH SOURCE | ||
This is from bcc. | ||
.IP | ||
https://github.com/iovisor/bcc | ||
.PP | ||
Also look in the bcc distribution for a companion _examples.txt file containing | ||
example usage, output, and commentary for this tool. | ||
.SH OS | ||
Linux | ||
.SH STABILITY | ||
Unstable - in development. | ||
.SH AUTHOR | ||
Brendan Gregg | ||
.SH SEE ALSO | ||
tcplife(8), tcpaccept(8), tcpconnect(8), tcptop(8) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
# Copyright 2018 Netflix, Inc. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# https://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
# from include/net/tcp_states.h: | ||
tcpstate = {} | ||
tcpstate[1] = 'ESTABLISHED' | ||
tcpstate[2] = 'SYN_SENT' | ||
tcpstate[3] = 'SYN_RECV' | ||
tcpstate[4] = 'FIN_WAIT1' | ||
tcpstate[5] = 'FIN_WAIT2' | ||
tcpstate[6] = 'TIME_WAIT' | ||
tcpstate[7] = 'CLOSE' | ||
tcpstate[8] = 'CLOSE_WAIT' | ||
tcpstate[9] = 'LAST_ACK' | ||
tcpstate[10] = 'LISTEN' | ||
tcpstate[11] = 'CLOSING' | ||
tcpstate[12] = 'NEW_SYN_RECV' | ||
|
||
# from include/net/tcp.h: | ||
TCPHDR_FIN = 0x01; | ||
TCPHDR_SYN = 0x02; | ||
TCPHDR_RST = 0x04; | ||
TCPHDR_PSH = 0x08; | ||
TCPHDR_ACK = 0x10; | ||
TCPHDR_URG = 0x20; | ||
TCPHDR_ECE = 0x40; | ||
TCPHDR_CWR = 0x80; | ||
|
||
def flags2str(flags): | ||
arr = []; | ||
if flags & TCPHDR_FIN: | ||
arr.append("FIN"); | ||
if flags & TCPHDR_SYN: | ||
arr.append("SYN"); | ||
if flags & TCPHDR_RST: | ||
arr.append("RST"); | ||
if flags & TCPHDR_PSH: | ||
arr.append("PSH"); | ||
if flags & TCPHDR_ACK: | ||
arr.append("ACK"); | ||
if flags & TCPHDR_URG: | ||
arr.append("URG"); | ||
if flags & TCPHDR_ECE: | ||
arr.append("ECE"); | ||
if flags & TCPHDR_CWR: | ||
arr.append("CWR"); | ||
return "|".join(arr); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,222 @@ | ||
#!/usr/bin/python | ||
# @lint-avoid-python-3-compatibility-imports | ||
# | ||
# tcpdrop Trace TCP kernel-dropped packets/segments. | ||
# For Linux, uses BCC, eBPF. Embedded C. | ||
# | ||
# This provides information such as packet details, socket state, and kernel | ||
# stack trace for packets/segments that were dropped via tcp_drop(). | ||
# | ||
# USAGE: tcpdrop [-c] [-h] [-l] | ||
# | ||
# This uses dynamic tracing of kernel functions, and will need to be updated | ||
# to match kernel changes. | ||
# | ||
# Copyright 2018 Netflix, Inc. | ||
# Licensed under the Apache License, Version 2.0 (the "License") | ||
# | ||
# 30-May-2018 Brendan Gregg Created this. | ||
|
||
from __future__ import print_function | ||
from bcc import BPF | ||
import argparse | ||
from time import strftime | ||
from socket import inet_ntop, AF_INET, AF_INET6 | ||
from struct import pack | ||
import ctypes as ct | ||
from time import sleep | ||
from bcc import tcp | ||
|
||
# arguments | ||
examples = """examples: | ||
./tcpdrop # trace kernel TCP drops | ||
""" | ||
parser = argparse.ArgumentParser( | ||
description="Trace TCP drops by the kernel", | ||
formatter_class=argparse.RawDescriptionHelpFormatter, | ||
epilog=examples) | ||
parser.add_argument("--ebpf", action="store_true", | ||
help=argparse.SUPPRESS) | ||
args = parser.parse_args() | ||
debug = 0 | ||
|
||
# define BPF program | ||
bpf_text = """ | ||
#include <uapi/linux/ptrace.h> | ||
#include <uapi/linux/tcp.h> | ||
#include <uapi/linux/ip.h> | ||
#include <net/sock.h> | ||
#include <bcc/proto.h> | ||
BPF_STACK_TRACE(stack_traces, 1024); | ||
// separate data structs for ipv4 and ipv6 | ||
struct ipv4_data_t { | ||
// XXX: switch some to u32's when supported | ||
u64 stack_id; | ||
u64 pid; | ||
u64 ip; | ||
u64 saddr; | ||
u64 daddr; | ||
u64 sport; | ||
u64 dport; | ||
u64 state; | ||
u64 tcpflags; | ||
}; | ||
BPF_PERF_OUTPUT(ipv4_events); | ||
struct ipv6_data_t { | ||
u64 stack_id; | ||
u64 pid; | ||
u64 ip; | ||
unsigned __int128 saddr; | ||
unsigned __int128 daddr; | ||
u64 sport; | ||
u64 dport; | ||
u64 state; | ||
u64 tcpflags; | ||
}; | ||
BPF_PERF_OUTPUT(ipv6_events); | ||
static struct tcphdr *skb_to_tcphdr(const struct sk_buff *skb) | ||
{ | ||
// unstable API. verify logic in tcp_hdr() -> skb_transport_header(). | ||
return (struct tcphdr *)(skb->head + skb->transport_header); | ||
} | ||
static inline struct iphdr *skb_to_iphdr(const struct sk_buff *skb) | ||
{ | ||
// unstable API. verify logic in ip_hdr() -> skb_network_header(). | ||
return (struct iphdr *)(skb->head + skb->network_header); | ||
} | ||
// from include/net/tcp.h: | ||
#ifndef tcp_flag_byte | ||
#define tcp_flag_byte(th) (((u_int8_t *)th)[13]) | ||
#endif | ||
int trace_tcp_drop(struct pt_regs *ctx, struct sock *sk, struct sk_buff *skb) | ||
{ | ||
if (sk == NULL) | ||
return 0; | ||
u32 pid = bpf_get_current_pid_tgid(); | ||
// pull in details from the packet headers and the sock struct | ||
u16 family = sk->__sk_common.skc_family; | ||
char state = sk->__sk_common.skc_state; | ||
u16 sport = 0, dport = 0; | ||
u8 tcpflags = 0; | ||
struct tcphdr *tcp = skb_to_tcphdr(skb); | ||
struct iphdr *ip = skb_to_iphdr(skb); | ||
bpf_probe_read(&sport, sizeof(sport), &tcp->source); | ||
bpf_probe_read(&dport, sizeof(dport), &tcp->dest); | ||
bpf_probe_read(&tcpflags, sizeof(tcpflags), &tcp_flag_byte(tcp)); | ||
sport = ntohs(sport); | ||
dport = ntohs(dport); | ||
if (family == AF_INET) { | ||
struct ipv4_data_t data4 = {.pid = pid, .ip = 4}; | ||
bpf_probe_read(&data4.saddr, sizeof(u32), &ip->saddr); | ||
bpf_probe_read(&data4.daddr, sizeof(u32), &ip->daddr); | ||
data4.dport = dport; | ||
data4.sport = sport; | ||
data4.tcpflags = tcpflags; | ||
data4.state = state; | ||
data4.stack_id = stack_traces.get_stackid(ctx, 0); | ||
ipv4_events.perf_submit(ctx, &data4, sizeof(data4)); | ||
} else if (family == AF_INET6) { | ||
struct ipv6_data_t data6 = {.pid = pid, .ip = 6}; | ||
bpf_probe_read(&data6.saddr, sizeof(data6.saddr), | ||
sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); | ||
bpf_probe_read(&data6.daddr, sizeof(data6.daddr), | ||
sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32); | ||
data6.dport = dport; | ||
data6.sport = sport; | ||
data6.tcpflags = tcpflags; | ||
data6.state = state; | ||
data6.stack_id = stack_traces.get_stackid(ctx, 0); | ||
ipv6_events.perf_submit(ctx, &data6, sizeof(data6)); | ||
} | ||
// else drop | ||
return 0; | ||
} | ||
""" | ||
|
||
if debug or args.ebpf: | ||
print(bpf_text) | ||
if args.ebpf: | ||
exit() | ||
|
||
# event data | ||
class Data_ipv4(ct.Structure): | ||
_fields_ = [ | ||
("stack_id", ct.c_ulonglong), | ||
("pid", ct.c_ulonglong), | ||
("ip", ct.c_ulonglong), | ||
("saddr", ct.c_ulonglong), | ||
("daddr", ct.c_ulonglong), | ||
("sport", ct.c_ulonglong), | ||
("dport", ct.c_ulonglong), | ||
("state", ct.c_ulonglong), | ||
("tcpflags", ct.c_ulonglong) | ||
] | ||
|
||
class Data_ipv6(ct.Structure): | ||
_fields_ = [ | ||
("stack_id", ct.c_ulonglong), | ||
("pid", ct.c_ulonglong), | ||
("ip", ct.c_ulonglong), | ||
("saddr", (ct.c_ulonglong * 2)), | ||
("daddr", (ct.c_ulonglong * 2)), | ||
("sport", ct.c_ulonglong), | ||
("dport", ct.c_ulonglong), | ||
("state", ct.c_ulonglong), | ||
("tcpflags", ct.c_ulonglong) | ||
] | ||
|
||
# process event | ||
def print_ipv4_event(cpu, data, size): | ||
event = ct.cast(data, ct.POINTER(Data_ipv4)).contents | ||
print("%-8s %-6d %-2d %-20s > %-20s %s (%s)" % ( | ||
strftime("%H:%M:%S"), event.pid, event.ip, | ||
"%s:%d" % (inet_ntop(AF_INET, pack('I', event.saddr)), event.sport), | ||
"%s:%s" % (inet_ntop(AF_INET, pack('I', event.daddr)), event.dport), | ||
tcp.tcpstate[event.state], tcp.flags2str(event.tcpflags))) | ||
for addr in stack_traces.walk(event.stack_id): | ||
sym = b.ksym(addr, show_offset=True) | ||
print("\t%s" % sym) | ||
print("") | ||
|
||
def print_ipv6_event(cpu, data, size): | ||
event = ct.cast(data, ct.POINTER(Data_ipv6)).contents | ||
print("%-8s %-6d %-2d %-20s > %-20s %s (%s)" % ( | ||
strftime("%H:%M:%S"), event.pid, event.ip, | ||
"%s:%d" % (inet_ntop(AF_INET6, event.saddr), event.sport), | ||
"%s:%d" % (inet_ntop(AF_INET6, event.daddr), event.dport), | ||
tcp.tcpstate[event.state], tcp.flags2str(event.tcpflags))) | ||
for addr in stack_traces.walk(event.stack_id): | ||
sym = b.ksym(addr, show_offset=True) | ||
print("\t%s" % sym) | ||
print("") | ||
|
||
# initialize BPF | ||
b = BPF(text=bpf_text) | ||
if b.get_kprobe_functions("tcp_drop"): | ||
b.attach_kprobe(event="tcp_drop", fn_name="trace_tcp_drop") | ||
else: | ||
print("ERROR: tcp_drop() kernel function not found or traceable. " | ||
"Older kernel versions not supported.") | ||
exit() | ||
stack_traces = b.get_table("stack_traces") | ||
|
||
# header | ||
print("%-8s %-6s %-2s %-20s > %-20s %s (%s)" % ("TIME", "PID", "IP", | ||
"SADDR:SPORT", "DADDR:DPORT", "STATE", "FLAGS")) | ||
|
||
# read events | ||
b["ipv4_events"].open_perf_buffer(print_ipv4_event) | ||
b["ipv6_events"].open_perf_buffer(print_ipv6_event) | ||
while 1: | ||
b.perf_buffer_poll() |
Oops, something went wrong.