Skip to content

Commit

Permalink
Merge pull request #2894 from sumanthkorikkar/bcc-tools-fixes
Browse files Browse the repository at this point in the history
Bcc tools fixe for trace.py and argdist.py for bpf_probe_read_user.
  • Loading branch information
yonghong-song committed May 5, 2020
2 parents 0d93f24 + 09be5b5 commit 1b03643
Show file tree
Hide file tree
Showing 7 changed files with 128 additions and 76 deletions.
2 changes: 1 addition & 1 deletion src/cc/libbpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -1168,7 +1168,7 @@ int bpf_attach_raw_tracepoint(int progfd, const char *tp_name)

bool bpf_has_kernel_btf(void)
{
return libbpf_find_vmlinux_btf_id("bpf_prog_put", 0);
return libbpf_find_vmlinux_btf_id("bpf_prog_put", 0) > 0;
}

int bpf_detach_kfunc(int prog_fd, char *func)
Expand Down
2 changes: 1 addition & 1 deletion src/python/bcc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from .libbcc import lib, bcc_symbol, bcc_symbol_option, bcc_stacktrace_build_id, _SYM_CB_TYPE
from .table import Table, PerfEventArray
from .perf import Perf
from .utils import get_online_cpus, printb, _assert_is_bytes, ArgString
from .utils import get_online_cpus, printb, _assert_is_bytes, ArgString, StrcmpRewrite
from .version import __version__
from .disassembler import disassemble_prog, decode_map

Expand Down
48 changes: 48 additions & 0 deletions src/python/bcc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import sys
import traceback
import warnings
import re

from .libbcc import lib

Expand Down Expand Up @@ -97,3 +98,50 @@ def _assert_is_bytes(arg):
return ArgString(arg).__bytes__()
return arg

class StrcmpRewrite(object):
@staticmethod
def _generate_streq_function(string, probe_read_func, streq_functions,
probeid):
fname = "streq_%d" % probeid
streq_functions += """
static inline bool %s(char const *ignored, uintptr_t str) {
char needle[] = %s;
char haystack[sizeof(needle)];
%s(&haystack, sizeof(haystack), (void *)str);
for (int i = 0; i < sizeof(needle) - 1; ++i) {
if (needle[i] != haystack[i]) {
return false;
}
}
return true;
}
""" % (fname, string, probe_read_func)
return fname, streq_functions

@staticmethod
def rewrite_expr(expr, bin_cmp, is_user, probe_user_list, streq_functions,
probeid):
if bin_cmp:
STRCMP_RE = 'STRCMP\\(\"([^"]+)\\",(.+?)\\)'
else:
STRCMP_RE = 'STRCMP\\(("[^"]+\\"),(.+?)\\)'
matches = re.finditer(STRCMP_RE, expr)
for match in matches:
string = match.group(1)
probe_read_func = "bpf_probe_read"
# if user probe or @user tag is specified, use
# bpf_probe_read_user for char* read
if is_user or \
match.group(2).strip() in probe_user_list:
probe_read_func = "bpf_probe_read_user"
fname, streq_functions = StrcmpRewrite._generate_streq_function(
string, probe_read_func,
streq_functions, probeid)
probeid += 1
expr = expr.replace("STRCMP", fname, 1)
rdict = {
"expr" : expr,
"streq_functions" : streq_functions,
"probeid" : probeid
}
return rdict
47 changes: 21 additions & 26 deletions tools/argdist.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# Licensed under the Apache License, Version 2.0 (the "License")
# Copyright (C) 2016 Sasha Goldshtein.

from bcc import BPF, USDT
from bcc import BPF, USDT, StrcmpRewrite
from time import sleep, strftime
import argparse
import re
Expand Down Expand Up @@ -41,6 +41,10 @@ def _parse_signature(self):
param_type = param[0:index + 1].strip()
param_name = param[index + 1:].strip()
self.param_types[param_name] = param_type
# Maintain list of user params. Then later decide to
# switch to bpf_probe_read or bpf_probe_read_user.
if "__user" in param_type.split():
self.probe_user_list.add(param_name)

def _generate_entry(self):
self.entry_probe_func = self.probe_func_name + "_entry"
Expand Down Expand Up @@ -182,6 +186,8 @@ def __init__(self, tool, type, specifier):
self.pid = tool.args.pid
self.cumulative = tool.args.cumulative or False
self.raw_spec = specifier
self.probe_user_list = set()
self.bin_cmp = False
self._validate_specifier()

spec_and_label = specifier.split('#')
Expand Down Expand Up @@ -250,32 +256,16 @@ def _enable_usdt_probe(self):
self.usdt_ctx.enable_probe(
self.function, self.probe_func_name)

def _generate_streq_function(self, string):
fname = "streq_%d" % Probe.streq_index
Probe.streq_index += 1
self.streq_functions += """
static inline bool %s(char const *ignored, char const *str) {
char needle[] = %s;
char haystack[sizeof(needle)];
bpf_probe_read(&haystack, sizeof(haystack), (void *)str);
for (int i = 0; i < sizeof(needle) - 1; ++i) {
if (needle[i] != haystack[i]) {
return false;
}
}
return true;
}
""" % (fname, string)
return fname

def _substitute_exprs(self):
def repl(expr):
expr = self._substitute_aliases(expr)
matches = re.finditer('STRCMP\\(("[^"]+\\")', expr)
for match in matches:
string = match.group(1)
fname = self._generate_streq_function(string)
expr = expr.replace("STRCMP", fname, 1)
rdict = StrcmpRewrite.rewrite_expr(expr,
self.bin_cmp, self.library,
self.probe_user_list, self.streq_functions,
Probe.streq_index)
expr = rdict["expr"]
self.streq_functions = rdict["streq_functions"]
Probe.streq_index = rdict["probeid"]
return expr.replace("$retval", "PT_REGS_RC(ctx)")
for i in range(0, len(self.exprs)):
self.exprs[i] = repl(self.exprs[i])
Expand Down Expand Up @@ -305,9 +295,14 @@ def _generate_usdt_arg_assignment(self, i):
def _generate_field_assignment(self, i):
text = self._generate_usdt_arg_assignment(i)
if self._is_string(self.expr_types[i]):
return (text + " bpf_probe_read(&__key.v%d.s," +
if self.is_user or \
self.exprs[i] in self.probe_user_list:
probe_readfunc = "bpf_probe_read_user"
else:
probe_readfunc = "bpf_probe_read"
return (text + " %s(&__key.v%d.s," +
" sizeof(__key.v%d.s), (void *)%s);\n") % \
(i, i, self.exprs[i])
(probe_readfunc, i, i, self.exprs[i])
else:
return text + " __key.v%d = %s;\n" % \
(i, self.exprs[i])
Expand Down
9 changes: 9 additions & 0 deletions tools/argdist_example.txt
Original file line number Diff line number Diff line change
Expand Up @@ -449,3 +449,12 @@ argdist -I 'kernel/sched/sched.h' \
in kernel/sched/sched.h which is in kernel source tree and not in kernel-devel
package. So this command needs to run at the kernel source tree root directory
so that the added header file can be found by the compiler.

argdist -C 'p::do_sys_open(int dfd, const char __user *filename, int flags,
umode_t mode):char*:filename:STRCMP("sample.txt", filename)'
Trace open of the file "sample.txt". It should be noted that 'filename'
passed to the do_sys_open is a char * user pointer. Hence parameter
'filename' should be tagged with __user for kprobes (const char __user
*filename). This information distinguishes if the 'filename' should be
copied from userspace to the bpf stack or from kernel space to the bpf
stack.
79 changes: 38 additions & 41 deletions tools/trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# Copyright (C) 2016 Sasha Goldshtein.

from __future__ import print_function
from bcc import BPF, USDT
from bcc import BPF, USDT, StrcmpRewrite
from functools import partial
from time import sleep, strftime
import time
Expand Down Expand Up @@ -65,6 +65,7 @@ def __init__(self, probe, string_size, kernel_stack, user_stack,
self.string_size = string_size
self.kernel_stack = kernel_stack
self.user_stack = user_stack
self.probe_user_list = set()
Probe.probe_count += 1
self._parse_probe()
self.probe_num = Probe.probe_count
Expand Down Expand Up @@ -260,47 +261,33 @@ def _parse_action(self, action):
"$task" : "((struct task_struct *)bpf_get_current_task())"
}

def _generate_streq_function(self, string):
fname = "streq_%d" % Probe.streq_index
Probe.streq_index += 1
self.streq_functions += """
static inline bool %s(char const *ignored, uintptr_t str) {
char needle[] = %s;
char haystack[sizeof(needle)];
bpf_probe_read(&haystack, sizeof(haystack), (void *)str);
for (int i = 0; i < sizeof(needle) - 1; ++i) {
if (needle[i] != haystack[i]) {
return false;
}
}
return true;
}
""" % (fname, string)
return fname

def _rewrite_expr(self, expr):
if self.is_syscall_kprobe:
for alias, replacement in Probe.aliases_indarg.items():
expr = expr.replace(alias, replacement)
else:
for alias, replacement in Probe.aliases_arg.items():
# For USDT probes, we replace argN values with the
# actual arguments for that probe obtained using
# bpf_readarg_N macros emitted at BPF construction.
if self.probe_type == "u":
continue
# Find the occurances of any arg[1-6]@user. Use it later to
# identify bpf_probe_read_user
for matches in re.finditer(r'(arg[1-6])(@user)', expr):
if matches.group(1).strip() not in self.probe_user_list:
self.probe_user_list.add(matches.group(1).strip())
# Remove @user occurrences from arg before resolving to its
# corresponding aliases.
expr = re.sub(r'(arg[1-6])@user', r'\1', expr)
rdict = StrcmpRewrite.rewrite_expr(expr,
self.bin_cmp, self.library,
self.probe_user_list, self.streq_functions,
Probe.streq_index)
expr = rdict["expr"]
self.streq_functions = rdict["streq_functions"]
Probe.streq_index = rdict["probeid"]
alias_to_check = Probe.aliases_indarg \
if self.is_syscall_kprobe \
else Probe.aliases_arg
# For USDT probes, we replace argN values with the
# actual arguments for that probe obtained using
# bpf_readarg_N macros emitted at BPF construction.
if not self.probe_type == "u":
for alias, replacement in alias_to_check.items():
expr = expr.replace(alias, replacement)
for alias, replacement in Probe.aliases_common.items():
expr = expr.replace(alias, replacement)
if self.bin_cmp:
STRCMP_RE = 'STRCMP\\(\"([^"]+)\\"'
else:
STRCMP_RE = 'STRCMP\\(("[^"]+\\")'
matches = re.finditer(STRCMP_RE, expr)
for match in matches:
string = match.group(1)
fname = self._generate_streq_function(string)
expr = expr.replace("STRCMP", fname, 1)
return expr

p_type = {"u": ct.c_uint, "d": ct.c_int, "lu": ct.c_ulong,
Expand Down Expand Up @@ -412,14 +399,24 @@ def _generate_field_assign(self, idx):
text = (" %s %s = 0;\n" +
" bpf_usdt_readarg(%s, ctx, &%s);\n") \
% (arg_ctype, expr, expr[3], expr)

probe_read_func = "bpf_probe_read"
if field_type == "s":
if self.library:
probe_read_func = "bpf_probe_read_user"
else:
alias_to_check = Probe.aliases_indarg \
if self.is_syscall_kprobe \
else Probe.aliases_arg
for arg, alias in alias_to_check.items():
if alias == expr and arg in self.probe_user_list:
probe_read_func = "bpf_probe_read_user"
break
return text + """
if (%s != 0) {
void *__tmp = (void *)%s;
bpf_probe_read(&__data.v%d, sizeof(__data.v%d), __tmp);
%s(&__data.v%d, sizeof(__data.v%d), __tmp);
}
""" % (expr, expr, idx, idx)
""" % (expr, expr, probe_read_func, idx, idx)
if field_type in Probe.fmt_types:
return text + " __data.v%d = (%s)%s;\n" % \
(idx, Probe.c_type[field_type], expr)
Expand Down
17 changes: 10 additions & 7 deletions tools/trace_example.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ Event message filter is useful while you only interesting the specific event.
Like the program open thousands file and you only want to see the "temp" file
and print stack.

# trace 'do_sys_open "%s", arg2' -UK -f temp
# trace 'do_sys_open "%s", arg2@user' -UK -f temp
PID TID COMM FUNC -
9557 9557 a.out do_sys_open temp.1
do_sys_open+0x1 [kernel]
Expand All @@ -71,7 +71,7 @@ PID TID COMM FUNC -

Process name filter is porting from tools/opensnoop

# trace 'do_sys_open "%s", arg2' -UK -n out
# trace 'do_sys_open "%s", arg2@user' -UK -n out
PID TID COMM FUNC -
9557 9557 a.out do_sys_open temp.1
do_sys_open+0x1 [kernel]
Expand Down Expand Up @@ -241,7 +241,7 @@ so it always includes this header file.
As a final example, let's trace open syscalls for a specific process. By
default, tracing is system-wide, but the -p switch overrides this:

# trace -p 2740 'do_sys_open "%s", arg2' -T
# trace -p 2740 'do_sys_open "%s", arg2@user' -T
TIME PID COMM FUNC -
05:36:16 15872 ls do_sys_open /etc/ld.so.cache
05:36:16 15872 ls do_sys_open /lib64/libselinux.so.1
Expand Down Expand Up @@ -335,11 +335,14 @@ EXAMPLES:

trace do_sys_open
Trace the open syscall and print a default trace message when entered
trace 'do_sys_open "%s", arg2'
Trace the open syscall and print the filename being opened
trace 'do_sys_open "%s", arg2' -n main
trace 'do_sys_open "%s", arg2@user'
Trace the open syscall and print the filename being opened. @user is
added to arg2 in kprobes to ensure that char * should be copied from
the userspace stack to the bpf stack. If not specified, previous
behaviour is expected.
trace 'do_sys_open "%s", arg2@user' -n main
Trace the open syscall and only print event that process names containing "main"
trace 'do_sys_open "%s", arg2' -f config
trace 'do_sys_open "%s", arg2@user' -f config
Trace the open syscall and print the filename being opened filtered by "config"
trace 'sys_read (arg3 > 20000) "read %d bytes", arg3'
Trace the read syscall and print a message for reads >20000 bytes
Expand Down

0 comments on commit 1b03643

Please sign in to comment.