diff --git a/Dockerfile.tests b/Dockerfile.tests index 251eb5317249..a8456a91e70b 100644 --- a/Dockerfile.tests +++ b/Dockerfile.tests @@ -50,8 +50,8 @@ RUN apt-get update && apt-get install -y \ libtinfo5 \ libtinfo-dev -RUN pip3 install pyroute2 netaddr -RUN pip install pyroute2 netaddr +RUN pip3 install pyroute2 netaddr dnslib cachetools +RUN pip install pyroute2 netaddr dnslib cachetools # FIXME this is faster than building from source, but it seems there is a bug # in probing libruby.so rather than ruby binary diff --git a/Dockerfile.ubuntu b/Dockerfile.ubuntu index 1aeb84182ee5..cfe353d84749 100644 --- a/Dockerfile.ubuntu +++ b/Dockerfile.ubuntu @@ -23,5 +23,5 @@ COPY --from=builder /root/bcc/*.deb /root/bcc/ RUN \ apt-get update -y && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y python python3 binutils libelf1 kmod && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y python python3 binutils libelf1 kmod python-dnslib python-cachetools python3-dnslib python3-cachetools && \ dpkg -i /root/bcc/*.deb diff --git a/INSTALL.md b/INSTALL.md index 76ac16f98faf..99328f49a90e 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -325,7 +325,7 @@ apt-get -t jessie-backports install linux-base linux-image-4.9.0-0.bpo.2-amd64 l apt-get install debhelper cmake libllvm3.8 llvm-3.8-dev libclang-3.8-dev \ libelf-dev bison flex libedit-dev clang-format-3.8 python python-netaddr \ python-pyroute2 luajit libluajit-5.1-dev arping iperf netperf ethtool \ - devscripts zlib1g-dev libfl-dev + devscripts zlib1g-dev libfl-dev python-dnslib python-cachetools ``` #### Sudo @@ -419,7 +419,7 @@ popd ``` sudo dnf install -y bison cmake ethtool flex git iperf libstdc++-static \ python-netaddr python-pip gcc gcc-c++ make zlib-devel \ - elfutils-libelf-devel + elfutils-libelf-devel python-cachetools sudo dnf install -y luajit luajit-devel # for Lua support sudo dnf install -y \ http://repo.iovisor.org/yum/extra/mageia/cauldron/x86_64/netperf-2.7.0-1.mga6.x86_64.rpm @@ -548,7 +548,7 @@ Tested on Amazon Linux AMI release 2018.03 (kernel 4.14.47-56.37.amzn1.x86_64) # enable epel to get iperf, luajit, luajit-devel, cmake3 (cmake3 is required to support c++11) sudo yum-config-manager --enable epel -sudo yum install -y bison cmake3 ethtool flex git iperf libstdc++-static python-netaddr gcc gcc-c++ make zlib-devel elfutils-libelf-devel +sudo yum install -y bison cmake3 ethtool flex git iperf libstdc++-static python-netaddr python-cachetools gcc gcc-c++ make zlib-devel elfutils-libelf-devel sudo yum install -y luajit luajit-devel sudo yum install -y http://repo.iovisor.org/yum/extra/mageia/cauldron/x86_64/netperf-2.7.0-1.mga6.x86_64.rpm sudo pip install pyroute2 @@ -590,7 +590,7 @@ sudo /usr/share/bcc/tools/execsnoop # enable epel to get iperf, luajit, luajit-devel, cmake3 (cmake3 is required to support c++11) sudo yum-config-manager --enable epel -sudo yum install -y bison cmake3 ethtool flex git iperf libstdc++-static python-netaddr gcc gcc-c++ make zlib-devel elfutils-libelf-devel +sudo yum install -y bison cmake3 ethtool flex git iperf libstdc++-static python-netaddr python-cachetools gcc gcc-c++ make zlib-devel elfutils-libelf-devel sudo yum install -y luajit luajit-devel sudo yum install -y http://repo.iovisor.org/yum/extra/mageia/cauldron/x86_64/netperf-2.7.0-1.mga6.x86_64.rpm sudo pip install pyroute2 diff --git a/man/man8/tcpconnect.8 b/man/man8/tcpconnect.8 index c96058b0a2e4..e298dec4e626 100644 --- a/man/man8/tcpconnect.8 +++ b/man/man8/tcpconnect.8 @@ -2,7 +2,7 @@ .SH NAME tcpconnect \- Trace TCP active connections (connect()). Uses Linux eBPF/bcc. .SH SYNOPSIS -.B tcpconnect [\-h] [\-c] [\-t] [\-p PID] [-P PORT] [-u UID] [-U] [\-\-cgroupmap MAPPATH] [\-\-mntnsmap MAPPATH] +.B tcpconnect [\-h] [\-c] [\-t] [\-p PID] [-P PORT] [-u UID] [-U] [\-\-cgroupmap MAPPATH] [\-\-mntnsmap MAPPATH] [\-d] .SH DESCRIPTION This tool traces active TCP connections (eg, via a connect() syscall; accept() are passive connections). This can be useful for general @@ -14,9 +14,18 @@ This works by tracing the kernel tcp_v4_connect() and tcp_v6_connect() functions using dynamic tracing, and will need updating to match any changes to these functions. +When provided with the \-d or \-\-dns option, this tool will also correlate +connect calls with the most recent DNS query that matches the IP connected. +This feature works by tracing the kernel udp_recvmsg() function to collect DNS +responses. + Since this uses BPF, only the root user can use this tool. .SH REQUIREMENTS CONFIG_BPF and bcc. + +If using the \-d or \-\-dns option, you must have the +dnslib and cachetools python packages installed. You can install them with pip3 or with +apt on Ubuntu 18.04+ using the python3\-dnslib and python3\-cachetools packages. .SH OPTIONS .TP \-h @@ -45,6 +54,26 @@ Trace cgroups in this BPF map only (filtered in-kernel). .TP \--mntnsmap MAPPATH Trace mount namespaces in this BPF map only (filtered in-kernel). +.TP +\-d +Shows the most recent DNS query for the IP address in the connect call. +This is likely related to the TCP connection details in the other columns, but is not guaranteed. +This +feature works by tracing the udp_recvmsg kernel function and tracking DNS +responses received by the server. It only supports UDP DNS packets up to 512 bytes +in length. The python code keeps a cache of 10k DNS responses in memory +for up 24 hours. + +If the time difference in milliseconds +between when the system received a DNS response and when a +connect syscall was traced using an IP in that DNS response is greater than 100ms, +this tool will report this delta after the query. +These deltas should be relatively short for most applications. A +long delay between the response and connect could be either anomalous activity +or indicate a misattribution between the DNS name requested and the IP that +the connect syscall is using. + +The \-d option may not be used with the count feature (option \-c) .SH EXAMPLES .TP Trace all active TCP connections: @@ -55,6 +84,10 @@ Trace all TCP connects, and include timestamps: # .B tcpconnect \-t .TP +Trace all TCP connects, and include most recent matching DNS query for each connected IP +# +.B tcpconnect \-d +.TP Trace PID 181 only: # .B tcpconnect \-p 181 @@ -110,12 +143,27 @@ Destination port .TP CONNECTS Accumulated active connections since start. +.TP +QUERY +Shows the most recent DNS query for the IP address in the connect call. +This is likely related to the TCP connection details in the other columns, but is not guaranteed. .SH OVERHEAD This traces the kernel tcp_v[46]_connect functions and prints output for each event. As the rate of this is generally expected to be low (< 1000/s), the overhead is also expected to be negligible. If you have an application that is calling a high rate of connect()s, such as a proxy server, then test and understand this overhead before use. + +If you are using the \-d option to track DNS requests, this tool will trace the +udp_recvmsg function and generate an event for any packets from UDP port 53. +This event contains up to 512 bytes of the UDP packet payload. +Typical applications do not extensively use UDP, so the performance overhead of +tracing udp_recvmsg is +expected to be negligible, However, if you have an application that receives +many UDP packets, then you should test and understand the overhead of tracing +every received UDP message. Furthermore, performance overhead of running +this tool on a DNS server is expected to be higher than average because all +DNS response packets will be copied to userspace. .SH SOURCE This is from bcc. .IP diff --git a/scripts/bpf_demo.ks.erb b/scripts/bpf_demo.ks.erb index a32f0b69b403..ac68698fae15 100644 --- a/scripts/bpf_demo.ks.erb +++ b/scripts/bpf_demo.ks.erb @@ -35,6 +35,7 @@ kexec-tools cmake libstdc++-static python-netaddr +python-cachetools python-futures %end diff --git a/scripts/build-rpm.sh b/scripts/build-rpm.sh index 81cc2d124610..18d8280dd7e6 100755 --- a/scripts/build-rpm.sh +++ b/scripts/build-rpm.sh @@ -10,6 +10,17 @@ function cleanup() { } trap cleanup EXIT +# install python dependencies for test +if [ -f os-release ]; then + . os-release +fi +if [[ $VERSION_ID -lt 30 ]]; then + PKGS="python3-cachetools python-cachetools" +else + PKGS="python3-cachetools" +fi +sudo dnf install -y $PKGS + mkdir $TMP/{BUILD,RPMS,SOURCES,SPECS,SRPMS} llvmver=3.7.1 diff --git a/tools/tcpconnect.py b/tools/tcpconnect.py index 7c2cea1264b5..acdf176723dc 100755 --- a/tools/tcpconnect.py +++ b/tools/tcpconnect.py @@ -18,6 +18,7 @@ # 14-Feb-2016 " " Switch to bpf_perf_output. # 09-Jan-2019 Takuma Kume Support filtering by UID # 30-Jul-2019 Xiaozhou Liu Count connects. +# 07-Oct-2020 Nabil Schear Correlate connects with DNS responses from __future__ import print_function from bcc import BPF @@ -27,11 +28,13 @@ from socket import inet_ntop, ntohs, AF_INET, AF_INET6 from struct import pack from time import sleep +from datetime import datetime # arguments examples = """examples: ./tcpconnect # trace all TCP connect()s ./tcpconnect -t # include timestamps + ./tcpconnect -d # include DNS queries associated with connects ./tcpconnect -p 181 # only trace PID 181 ./tcpconnect -P 80 # only trace port 80 ./tcpconnect -P 80,81 # only trace port 80 and 81 @@ -61,6 +64,8 @@ help="trace cgroups in this BPF map only") parser.add_argument("--mntnsmap", help="trace mount namespaces in this BPF map only") +parser.add_argument("-d", "--dns", action="store_true", + help="include likely DNS query associated with each connect") parser.add_argument("--ebpf", action="store_true", help=argparse.SUPPRESS) args = parser.parse_args() @@ -182,15 +187,15 @@ } """ -struct_init = { 'ipv4': - { 'count' : +struct_init = {'ipv4': + {'count': """ struct ipv4_flow_key_t flow_key = {}; flow_key.saddr = skp->__sk_common.skc_rcv_saddr; flow_key.daddr = skp->__sk_common.skc_daddr; flow_key.dport = ntohs(dport); ipv4_count.increment(flow_key);""", - 'trace' : + 'trace': """ struct ipv4_data_t data4 = {.pid = pid, .ip = ipver}; data4.uid = bpf_get_current_uid_gid(); @@ -202,7 +207,7 @@ ipv4_events.perf_submit(ctx, &data4, sizeof(data4));""" }, 'ipv6': - { 'count' : + {'count': """ struct ipv6_flow_key_t flow_key = {}; bpf_probe_read_kernel(&flow_key.saddr, sizeof(flow_key.saddr), @@ -211,7 +216,7 @@ skp->__sk_common.skc_v6_daddr.in6_u.u6_addr32); flow_key.dport = ntohs(dport); ipv6_count.increment(flow_key);""", - 'trace' : + 'trace': """ struct ipv6_data_t data6 = {.pid = pid, .ip = ipver}; data6.uid = bpf_get_current_uid_gid(); @@ -224,7 +229,86 @@ bpf_get_current_comm(&data6.task, sizeof(data6.task)); ipv6_events.perf_submit(ctx, &data6, sizeof(data6));""" } - } + } + +# This defines an additional BPF program that instruments udp_recvmsg system +# call to locate DNS response packets on UDP port 53. When these packets are +# located, the data is copied to user-space where python will parse them with +# dnslib. +# +# uses a percpu array of length 1 to store the dns_data_t off the stack to +# allow for a maximum DNS packet length of 512 bytes. +dns_bpf_text = """ +#include + +#define MAX_PKT 512 +struct dns_data_t { + u8 pkt[MAX_PKT]; +}; + +BPF_PERF_OUTPUT(dns_events); + +// store msghdr pointer captured on syscall entry to parse on syscall return +BPF_HASH(tbl_udp_msg_hdr, u64, struct msghdr *); + +// single element per-cpu array to hold the current event off the stack +BPF_PERCPU_ARRAY(dns_data,struct dns_data_t,1); + +int trace_udp_recvmsg(struct pt_regs *ctx) +{ + __u64 pid_tgid = bpf_get_current_pid_tgid(); + struct sock *sk = (struct sock *)PT_REGS_PARM1(ctx); + struct inet_sock *is = inet_sk(sk); + + // only grab port 53 packets, 13568 is ntohs(53) + if (is->inet_dport == 13568) { + struct msghdr *msghdr = (struct msghdr *)PT_REGS_PARM2(ctx); + tbl_udp_msg_hdr.update(&pid_tgid, &msghdr); + } + return 0; +} + +int trace_udp_ret_recvmsg(struct pt_regs *ctx) +{ + __u64 pid_tgid = bpf_get_current_pid_tgid(); + u32 zero = 0; + struct msghdr **msgpp = tbl_udp_msg_hdr.lookup(&pid_tgid); + if (msgpp == 0) + return 0; + + struct msghdr *msghdr = (struct msghdr *)*msgpp; + if (msghdr->msg_iter.type != ITER_IOVEC) + goto delete_and_return; + + int copied = (int)PT_REGS_RC(ctx); + if (copied < 0) + goto delete_and_return; + size_t buflen = (size_t)copied; + + if (buflen > msghdr->msg_iter.iov->iov_len) + goto delete_and_return; + + if (buflen > MAX_PKT) + buflen = MAX_PKT; + + struct dns_data_t *data = dns_data.lookup(&zero); + if (!data) // this should never happen, just making the verifier happy + return 0; + + void *iovbase = msghdr->msg_iter.iov->iov_base; + bpf_probe_read(data->pkt, buflen, iovbase); + dns_events.perf_submit(ctx, data, buflen); + +delete_and_return: + tbl_udp_msg_hdr.delete(&pid_tgid); + return 0; +} + +""" + +if args.count and args.dns: + print("Error: you may not specify -d/--dns with -c/--count.") + exit() # code substitutions if args.count: @@ -251,6 +335,9 @@ bpf_text = bpf_text.replace('FILTER_PORT', '') bpf_text = bpf_text.replace('FILTER_UID', '') +if args.dns: + bpf_text += dns_bpf_text + if debug or args.ebpf: print(bpf_text) if args.ebpf: @@ -266,10 +353,11 @@ def print_ipv4_event(cpu, data, size): printb(b"%-9.3f" % ((float(event.ts_us) - start_ts) / 1000000), nl="") if args.print_uid: printb(b"%-6d" % event.uid, nl="") - printb(b"%-6d %-12.12s %-2d %-16s %-16s %-4d" % (event.pid, + dest_ip = inet_ntop(AF_INET, pack("I", event.daddr)).encode() + printb(b"%-6d %-12.12s %-2d %-16s %-16s %-6d %s" % (event.pid, event.task, event.ip, inet_ntop(AF_INET, pack("I", event.saddr)).encode(), - inet_ntop(AF_INET, pack("I", event.daddr)).encode(), event.dport)) + dest_ip, event.dport, print_dns(dest_ip))) def print_ipv6_event(cpu, data, size): event = b["ipv6_events"].event(data) @@ -280,22 +368,97 @@ def print_ipv6_event(cpu, data, size): printb(b"%-9.3f" % ((float(event.ts_us) - start_ts) / 1000000), nl="") if args.print_uid: printb(b"%-6d" % event.uid, nl="") - printb(b"%-6d %-12.12s %-2d %-16s %-16s %-4d" % (event.pid, + dest_ip = inet_ntop(AF_INET6, event.daddr).encode() + printb(b"%-6d %-12.12s %-2d %-16s %-16s %-6d %s" % (event.pid, event.task, event.ip, - inet_ntop(AF_INET6, event.saddr).encode(), inet_ntop(AF_INET6, event.daddr).encode(), - event.dport)) + inet_ntop(AF_INET6, event.saddr).encode(), dest_ip, + event.dport, print_dns(dest_ip))) def depict_cnt(counts_tab, l3prot='ipv4'): - for k, v in sorted(counts_tab.items(), key=lambda counts: counts[1].value, reverse=True): + for k, v in sorted(counts_tab.items(), + key=lambda counts: counts[1].value, reverse=True): depict_key = "" if l3prot == 'ipv4': - depict_key = "%-25s %-25s %-20s" % ((inet_ntop(AF_INET, pack('I', k.saddr))), - inet_ntop(AF_INET, pack('I', k.daddr)), k.dport) + depict_key = "%-25s %-25s %-20s" % \ + ((inet_ntop(AF_INET, pack('I', k.saddr))), + inet_ntop(AF_INET, pack('I', k.daddr)), k.dport) else: - depict_key = "%-25s %-25s %-20s" % ((inet_ntop(AF_INET6, k.saddr)), - inet_ntop(AF_INET6, k.daddr), k.dport) - - print ("%s %-10d" % (depict_key, v.value)) + depict_key = "%-25s %-25s %-20s" % \ + ((inet_ntop(AF_INET6, k.saddr)), + inet_ntop(AF_INET6, k.daddr), k.dport) + + print("%s %-10d" % (depict_key, v.value)) + +def print_dns(dest_ip): + if not args.dns: + return b"" + + dnsname, timestamp = dns_cache.get(dest_ip, (None, None)) + if timestamp is not None: + diff = datetime.now() - timestamp + diff = float(diff.seconds) * 1000 + float(diff.microseconds) / 1000 + else: + diff = 0 + if dnsname is None: + dnsname = b"No DNS Query" + if dest_ip == b"127.0.0.1" or dest_ip == b"::1": + dnsname = b"localhost" + retval = b"%s" % dnsname + if diff > DELAY_DNS: + retval += b" (%.3fms)" % diff + return retval + +if args.dns: + try: + import dnslib + from cachetools import TTLCache + except ImportError: + print("Error: The python packages dnslib and cachetools are required " + "to use the -d/--dns option.") + print("Install this package with:") + print("\t$ pip3 install dnslib cachetools") + print(" or") + print("\t$ sudo apt-get install python3-dnslib python3-cachetools " + "(on Ubuntu 18.04+)") + exit(1) + + # 24 hours + DEFAULT_TTL = 86400 + + # Cache Size in entries + DNS_CACHE_SIZE = 10240 + + # delay in ms in which to warn users of long delay between the query + # and the connect that used the IP + DELAY_DNS = 100 + + dns_cache = TTLCache(maxsize=DNS_CACHE_SIZE, ttl=DEFAULT_TTL) + + # process event + def save_dns(cpu, data, size): + event = b["dns_events"].event(data) + payload = event.pkt[:size] + + # pass the payload to dnslib for parsing + dnspkt = dnslib.DNSRecord.parse(payload) + # lets only look at responses + if dnspkt.header.qr != 1: + return + # must be some questions in there + if dnspkt.header.q != 1: + return + # make sure there are answers + if dnspkt.header.a == 0 and dnspkt.header.aa == 0: + return + + # lop off the trailing . + question = ("%s" % dnspkt.q.qname)[:-1].encode('utf-8') + + for answer in dnspkt.rr: + # skip all but A and AAAA records + if answer.rtype == 1 or answer.rtype == 28: + dns_cache[str(answer.rdata).encode('utf-8')] = (question, + datetime.now()) # initialize BPF b = BPF(text=bpf_text) @@ -303,11 +466,14 @@ def depict_cnt(counts_tab, l3prot='ipv4'): b.attach_kprobe(event="tcp_v6_connect", fn_name="trace_connect_entry") b.attach_kretprobe(event="tcp_v4_connect", fn_name="trace_connect_v4_return") b.attach_kretprobe(event="tcp_v6_connect", fn_name="trace_connect_v6_return") +if args.dns: + b.attach_kprobe(event="udp_recvmsg", fn_name="trace_udp_recvmsg") + b.attach_kretprobe(event="udp_recvmsg", fn_name="trace_udp_ret_recvmsg") print("Tracing connect ... Hit Ctrl-C to end") if args.count: try: - while 1: + while True: sleep(99999999) except KeyboardInterrupt: pass @@ -324,15 +490,21 @@ def depict_cnt(counts_tab, l3prot='ipv4'): print("%-9s" % ("TIME(s)"), end="") if args.print_uid: print("%-6s" % ("UID"), end="") - print("%-6s %-12s %-2s %-16s %-16s %-4s" % ("PID", "COMM", "IP", "SADDR", - "DADDR", "DPORT")) + print("%-6s %-12s %-2s %-16s %-16s %-6s" % ("PID", "COMM", "IP", "SADDR", + "DADDR", "DPORT"), end="") + if args.dns: + print(" QUERY") + else: + print() start_ts = 0 # read events b["ipv4_events"].open_perf_buffer(print_ipv4_event) b["ipv6_events"].open_perf_buffer(print_ipv6_event) - while 1: + if args.dns: + b["dns_events"].open_perf_buffer(save_dns) + while True: try: b.perf_buffer_poll() except KeyboardInterrupt: diff --git a/tools/tcpconnect_example.txt b/tools/tcpconnect_example.txt index 7efac4a316c0..b8ad22d1326d 100644 --- a/tools/tcpconnect_example.txt +++ b/tools/tcpconnect_example.txt @@ -37,6 +37,23 @@ TIME(s) PID COMM IP SADDR DADDR DPORT The output shows some periodic connections (or attempts) from a "local_agent" process to various other addresses. A few connections occur every minute. +The -d option tracks DNS responses and tries to associate each connection with +the a previous DNS query issued before it. If a DNS response matching the IP +is found, it will be printed. If no match was found, "No DNS Query" is printed +in this column. Queries for 127.0.0.1 and ::1 are automatically associated with +"localhost". If the time between when the DNS response was received and a +connect call was traced exceeds 100ms, the tool will print the time delta +after the query name. See below for www.domain.com for an example. + +# ./tcpconnect -d +PID COMM IP SADDR DADDR DPORT QUERY +1543 amazon-ssm-a 4 10.66.75.54 176.32.119.67 443 ec2messages.us-west-1.amazonaws.com +1479 telnet 4 127.0.0.1 127.0.0.1 23 localhost +1469 curl 4 10.201.219.236 54.245.105.25 80 www.domain.com (123.342ms) +1469 curl 4 10.201.219.236 54.67.101.145 80 No DNS Query +1991 telnet 6 ::1 ::1 23 localhost +2015 ssh 6 fe80::2000:bff:fe82:3ac fe80::2000:bff:fe82:3ac 22 anotherhost.org + The -U option prints a UID column: @@ -79,8 +96,9 @@ For more details, see docs/special_filtering.md USAGE message: # ./tcpconnect -h + usage: tcpconnect.py [-h] [-t] [-p PID] [-P PORT] [-U] [-u UID] [-c] - [--cgroupmap CGROUPMAP] [--mntnsmap MNTNSMAP] + [--cgroupmap CGROUPMAP] [--mntnsmap MNTNSMAP] [-d] Trace TCP connects @@ -94,6 +112,8 @@ optional arguments: -c, --count count connects per src ip and dest ip/port --cgroupmap CGROUPMAP trace cgroups in this BPF map only + --mntnsmap MNTNSMAP trace mount namespaces in this BPF map only + -d, --dns include likely DNS query associated with each connect examples: ./tcpconnect # trace all TCP connect()s