diff --git a/README.md b/README.md index 59d8e090c9b8..e5af60e3f5fe 100644 --- a/README.md +++ b/README.md @@ -158,6 +158,7 @@ pair of .c and .py files, and some are directories of files. - tools/[tcpdrop](tools/tcpdrop.py): Trace kernel-based TCP packet drops with details. [Examples](tools/tcpdrop_example.txt). - tools/[tcplife](tools/tcplife.py): Trace TCP sessions and summarize lifespan. [Examples](tools/tcplife_example.txt). - tools/[tcpretrans](tools/tcpretrans.py): Trace TCP retransmits and TLPs. [Examples](tools/tcpretrans_example.txt). +- tools/[tcprtt](tools/tcprtt.py): Trace TCP round trip time. [Examples](tools/tcprtt_example.txt). - tools/[tcpstates](tools/tcpstates.py): Trace TCP session state changes with durations. [Examples](tools/tcpstates_example.txt). - tools/[tcpsubnet](tools/tcpsubnet.py): Summarize and aggregate TCP send by subnet. [Examples](tools/tcpsubnet_example.txt). - tools/[tcpsynbl](tools/tcpsynbl.py): Show TCP SYN backlog. [Examples](tools/tcpsynbl_example.txt). diff --git a/man/man8/tcprtt.8 b/man/man8/tcprtt.8 index 5c3bb89b9ca3..729a1abb1a78 100644 --- a/man/man8/tcprtt.8 +++ b/man/man8/tcprtt.8 @@ -2,7 +2,7 @@ .SH NAME tcprtt \- Trace TCP RTT of established connections. Uses Linux eBPF/bcc. .SH SYNOPSIS -.B tcprtt [\-h] [\-T] [\-D] [\-m] [\-i INTERVAL] [\-d DURATION] +.B tcprtt [\-h] [\-T] [\-D] [\-m] [\-i INTERVAL] [\-d DURATION] [\-b] [\-B] .SH DESCRIPTION This tool traces established connections RTT(round-trip time) to analyze the quality of network. This can be useful for general troubleshooting to @@ -31,17 +31,23 @@ Print output every interval seconds. \-d DURATION Total duration of trace in seconds. .TP -\-p SPORT -Filter for source port. +\-p LPORT +Filter for local port. .TP -\-P DPORT -Filter for destination port. +\-P RPORT +Filter for remote port. .TP -\-a SADDR -Filter for source address. +\-a LADDR +Filter for local address. .TP -\-A DADDR -Filter for destination address. +\-A RADDR +Filter for remote address. +.TP +\-b +Show sockets histogram by local address. +.TP +\-B +Show sockets histogram by remote address. .SH EXAMPLES .TP Trace TCP RTT and print 1 second summaries, 10 times: @@ -52,9 +58,13 @@ Summarize in millisecond, and timestamps: # .B tcprtt \-m \-T .TP -Only trace TCP RTT for destination address 192.168.1.100 and destination port 80: +Only trace TCP RTT for remote address 192.168.1.100 and remote port 80: +# +.B tcprtt \-i 1 \-d 10 \-A 192.168.1.100 \-P 80 +.TP +Trace local port and show a breakdown of remote hosts RTT: # -.B tcprtt \-i 1 \-d 10 -A 192.168.1.100 -P 80 +.B tcprtt \-i 3 --lport 80 --byraddr .SH OVERHEAD This traces the kernel tcp_rcv_established function and collects TCP RTT. The rate of this depends on your server application. If it is a web or proxy server diff --git a/tools/tcprtt.py b/tools/tcprtt.py index 81832cf0f81c..155ccffb2a83 100755 --- a/tools/tcprtt.py +++ b/tools/tcprtt.py @@ -4,7 +4,7 @@ # tcprtt Summarize TCP RTT as a histogram. For Linux, uses BCC, eBPF. # # USAGE: tcprtt [-h] [-T] [-D] [-m] [-i INTERVAL] [-d DURATION] -# [-p SPORT] [-P DPORT] [-a SADDR] [-A DADDR] +# [-p LPORT] [-P RPORT] [-a LADDR] [-A RADDR] [-b] [-B] # # Copyright (c) 2020 zhenwei pi # Licensed under the Apache License, Version 2.0 (the "License") @@ -14,6 +14,7 @@ from __future__ import print_function from bcc import BPF from time import sleep, strftime +from socket import inet_ntop, AF_INET import socket, struct import argparse @@ -22,10 +23,12 @@ ./tcprtt # summarize TCP RTT ./tcprtt -i 1 -d 10 # print 1 second summaries, 10 times ./tcprtt -m -T # summarize in millisecond, and timestamps - ./tcprtt -p # filter for source port - ./tcprtt -P # filter for destination port - ./tcprtt -a # filter for source address - ./tcprtt -A # filter for destination address + ./tcprtt -p # filter for local port + ./tcprtt -P # filter for remote port + ./tcprtt -a # filter for local address + ./tcprtt -A # filter for remote address + ./tcprtt -b # show sockets histogram by local address + ./tcprtt -B # show sockets histogram by remote address ./tcprtt -D # show debug bpf text """ parser = argparse.ArgumentParser( @@ -40,14 +43,18 @@ help="include timestamp on output") parser.add_argument("-m", "--milliseconds", action="store_true", help="millisecond histogram") -parser.add_argument("-p", "--sport", - help="source port") -parser.add_argument("-P", "--dport", - help="destination port") -parser.add_argument("-a", "--saddr", - help="source address") -parser.add_argument("-A", "--daddr", - help="destination address") +parser.add_argument("-p", "--lport", + help="filter for local port") +parser.add_argument("-P", "--rport", + help="filter for remote port") +parser.add_argument("-a", "--laddr", + help="filter for local address") +parser.add_argument("-A", "--raddr", + help="filter for remote address") +parser.add_argument("-b", "--byladdr", action="store_true", + help="show sockets histogram by local address") +parser.add_argument("-B", "--byraddr", action="store_true", + help="show sockets histogram by remote address") parser.add_argument("-D", "--debug", action="store_true", help="print BPF program before starting (for debugging purposes)") parser.add_argument("--ebpf", action="store_true", @@ -67,65 +74,72 @@ #include #include -BPF_HISTOGRAM(hist_srtt); +typedef struct sock_key { + u64 addr; + u64 slot; +} sock_key_t; + +STORAGE int trace_tcp_rcv(struct pt_regs *ctx, struct sock *sk, struct sk_buff *skb) { struct tcp_sock *ts = tcp_sk(sk); u32 srtt = ts->srtt_us >> 3; const struct inet_sock *inet = inet_sk(sk); + u16 sport = 0; + u16 dport = 0; + u32 saddr = 0; + u32 daddr = 0; + + bpf_probe_read_kernel(&sport, sizeof(sport), (void *)&inet->inet_sport); + bpf_probe_read_kernel(&dport, sizeof(dport), (void *)&inet->inet_dport); + bpf_probe_read_kernel(&saddr, sizeof(saddr), (void *)&inet->inet_saddr); + bpf_probe_read_kernel(&daddr, sizeof(daddr), (void *)&inet->inet_daddr); + + LPORTFILTER + RPORTFILTER + LADDRFILTER + RADDRFILTER - SPORTFILTER - DPORTFILTER - SADDRFILTER - DADDRFILTER FACTOR - hist_srtt.increment(bpf_log2l(srtt)); + STORE return 0; } """ -# filter for source port -if args.sport: - bpf_text = bpf_text.replace(b'SPORTFILTER', - b"""u16 sport = 0; - bpf_probe_read_kernel(&sport, sizeof(sport), (void *)&inet->inet_sport); - if (ntohs(sport) != %d) - return 0;""" % int(args.sport)) +# filter for local port +if args.lport: + bpf_text = bpf_text.replace(b'LPORTFILTER', + b"""if (ntohs(sport) != %d) + return 0;""" % int(args.lport)) else: - bpf_text = bpf_text.replace(b'SPORTFILTER', b'') + bpf_text = bpf_text.replace(b'LPORTFILTER', b'') -# filter for dest port -if args.dport: - bpf_text = bpf_text.replace(b'DPORTFILTER', - b"""u16 dport = 0; - bpf_probe_read_kernel(&dport, sizeof(dport), (void *)&inet->inet_dport); - if (ntohs(dport) != %d) - return 0;""" % int(args.dport)) +# filter for remote port +if args.rport: + bpf_text = bpf_text.replace(b'RPORTFILTER', + b"""if (ntohs(dport) != %d) + return 0;""" % int(args.rport)) else: - bpf_text = bpf_text.replace(b'DPORTFILTER', b'') + bpf_text = bpf_text.replace(b'RPORTFILTER', b'') -# filter for source address -if args.saddr: - bpf_text = bpf_text.replace(b'SADDRFILTER', - b"""u32 saddr = 0; - bpf_probe_read_kernel(&saddr, sizeof(saddr), (void *)&inet->inet_saddr); - if (saddr != %d) - return 0;""" % struct.unpack("=I", socket.inet_aton(args.saddr))[0]) +# filter for local address +if args.laddr: + bpf_text = bpf_text.replace(b'LADDRFILTER', + b"""if (saddr != %d) + return 0;""" % struct.unpack("=I", socket.inet_aton(args.laddr))[0]) else: - bpf_text = bpf_text.replace(b'SADDRFILTER', b'') + bpf_text = bpf_text.replace(b'LADDRFILTER', b'') -# filter for source address -if args.daddr: - bpf_text = bpf_text.replace(b'DADDRFILTER', - b"""u32 daddr = 0; - bpf_probe_read_kernel(&daddr, sizeof(daddr), (void *)&inet->inet_daddr); - if (daddr != %d) - return 0;""" % struct.unpack("=I", socket.inet_aton(args.daddr))[0]) +# filter for remote address +if args.raddr: + bpf_text = bpf_text.replace(b'RADDRFILTER', + b"""if (daddr != %d) + return 0;""" % struct.unpack("=I", socket.inet_aton(args.raddr))[0]) else: - bpf_text = bpf_text.replace(b'DADDRFILTER', b'') + bpf_text = bpf_text.replace(b'RADDRFILTER', b'') # show msecs or usecs[default] if args.milliseconds: @@ -135,6 +149,30 @@ bpf_text = bpf_text.replace('FACTOR', '') label = "usecs" +print_header = "srtt" +# show byladdr/byraddr histogram +if args.byladdr: + bpf_text = bpf_text.replace('STORAGE', + 'BPF_HISTOGRAM(hist_srtt, sock_key_t);') + bpf_text = bpf_text.replace('STORE', + b"""sock_key_t key; + key.addr = saddr; + key.slot = bpf_log2l(srtt); + hist_srtt.increment(key);""") + print_header = "Local Address: " +elif args.byraddr: + bpf_text = bpf_text.replace('STORAGE', + 'BPF_HISTOGRAM(hist_srtt, sock_key_t);') + bpf_text = bpf_text.replace('STORE', + b"""sock_key_t key; + key.addr = daddr; + key.slot = bpf_log2l(srtt); + hist_srtt.increment(key);""") + print_header = "Remote Address: " +else: + bpf_text = bpf_text.replace('STORAGE', 'BPF_HISTOGRAM(hist_srtt);') + bpf_text = bpf_text.replace('STORE', 'hist_srtt.increment(bpf_log2l(srtt));') + # debug/dump ebpf enable or not if args.debug or args.ebpf: print(bpf_text) @@ -147,6 +185,12 @@ print("Tracing TCP RTT... Hit Ctrl-C to end.") +def print_section(addr): + if args.byladdr: + return inet_ntop(AF_INET, struct.pack("I", addr)).encode() + elif args.byraddr: + return inet_ntop(AF_INET, struct.pack("I", addr)).encode() + # output exiting = 0 if args.interval else 1 dist = b.get_table("hist_srtt") @@ -162,7 +206,7 @@ if args.timestamp: print("%-8s\n" % strftime("%H:%M:%S"), end="") - dist.print_log2_hist(label, "srtt") + dist.print_log2_hist(label, section_header=print_header, section_print_fn=print_section) dist.clear() if exiting or seconds >= args.duration: diff --git a/tools/tcprtt_example.txt b/tools/tcprtt_example.txt index 9a3d2356b327..a5e6ed5c87ad 100644 --- a/tools/tcprtt_example.txt +++ b/tools/tcprtt_example.txt @@ -40,16 +40,57 @@ also shows unstable TCP RTT. So in this situation, we need to make sure the quality of network is good or not firstly. -Use filter for address and(or) port. Ex, only collect source address 192.168.122.200 -and destination address 192.168.122.100 and destination port 80. +Use filter for address and(or) port. Ex, only collect local address 192.168.122.200 +and remote address 192.168.122.100 and remote port 80. # ./tcprtt -i 1 -d 10 -m -a 192.168.122.200 -A 192.168.122.100 -P 80 +Tracing at server side, show each clients with its own histogram. +For example, run tcprtt on a storage node to show initiators' rtt histogram: +# ./tcprtt -i 1 -m --lport 3260 --byraddr +Tracing TCP RTT... Hit Ctrl-C to end. + +Remote Address: = 10.131.90.16 + msecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 0 | | + 8 -> 15 : 2 |****************************************| + +Remote Address: = 10.131.90.13 + msecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 4 |************************** | + 8 -> 15 : 6 |****************************************| + +Remote Address: = 10.131.89.153 + msecs : count distribution + 0 -> 1 : 120 |****************************************| + 2 -> 3 : 31 |********** | + 4 -> 7 : 32 |********** | + +Remote Address: = 10.131.89.150 + msecs : count distribution + 0 -> 1 : 12 |****************************************| + 2 -> 3 : 12 |****************************************| + 4 -> 7 : 9 |****************************** | + 8 -> 15 : 3 |********** | + +Remote Address: = 10.131.89.148 + msecs : count distribution + 0 -> 1 : 0 | | + 2 -> 3 : 0 | | + 4 -> 7 : 4 |****************************************| + +.... + + Full USAGE: # ./tcprtt -h -usage: tcprtt [-h] [-i INTERVAL] [-d DURATION] [-T] [-m] [-p SPORT] - [-P DPORT] [-a SADDR] [-A DADDR] [-D] +usage: tcprtt.py [-h] [-i INTERVAL] [-d DURATION] [-T] [-m] [-p LPORT] + [-P RPORT] [-a LADDR] [-A RADDR] [-b] [-B] [-D] Summarize TCP RTT as a histogram @@ -61,14 +102,16 @@ optional arguments: total duration of trace, seconds -T, --timestamp include timestamp on output -m, --milliseconds millisecond histogram - -p SPORT, --sport SPORT - source port - -P DPORT, --dport DPORT - destination port - -a SADDR, --saddr SADDR - source address - -A DADDR, --daddr DADDR - destination address + -p LPORT, --lport LPORT + filter for local port + -P RPORT, --rport RPORT + filter for remote port + -a LADDR, --laddr LADDR + filter for local address + -A RADDR, --raddr RADDR + filter for remote address + -b, --byladdr show sockets histogram by local address + -B, --byraddr show sockets histogram by remote address -D, --debug print BPF program before starting (for debugging purposes) @@ -76,8 +119,10 @@ examples: ./tcprtt # summarize TCP RTT ./tcprtt -i 1 -d 10 # print 1 second summaries, 10 times ./tcprtt -m -T # summarize in millisecond, and timestamps - ./tcprtt -p # filter for source port - ./tcprtt -P # filter for destination port - ./tcprtt -a # filter for source address - ./tcprtt -A # filter for destination address + ./tcprtt -p # filter for local port + ./tcprtt -P # filter for remote port + ./tcprtt -a # filter for local address + ./tcprtt -A # filter for remote address + ./tcprtt -b # show sockets histogram by local address + ./tcprtt -B # show sockets histogram by remote address ./tcprtt -D # show debug bpf text