From 2da34267fcae4485f4e05a17521214749f6f0edd Mon Sep 17 00:00:00 2001 From: yonghong-song Date: Wed, 13 Jun 2018 06:12:22 -0700 Subject: [PATCH] generate indirect parameter assignment if arch uses syscall wrapper (#1816) Fix issue #1802. On x64, the following commit (in 4.17) changed the raw parameter passed to the syscall entry function from a list of parameters supplied in user space to a single `pt_regs *` parameter. Also in 4.17, x64 syscall entry function is changed from `sys_` to `__x64_sys_`. ``` commit fa697140f9a20119a9ec8fd7460cc4314fbdaff3 Author: Dominik Brodowski Date: Thu Apr 5 11:53:02 2018 +0200 syscalls/x86: Use 'struct pt_regs' based syscall calling convention for 64-bit syscalls Let's make use of ARCH_HAS_SYSCALL_WRAPPER=y on pure 64-bit x86-64 systems: Each syscall defines a stub which takes struct pt_regs as its only argument. It decodes just those parameters it needs, e.g: asmlinkage long sys_xyzzy(const struct pt_regs *regs) { return SyS_xyzzy(regs->di, regs->si, regs->dx); } This approach avoids leaking random user-provided register content down the call chain. ... ``` In bcc, we support kprobe function signatures in the bpf program. The rewriter will automatically generate proper assignment to these parameters. With the above function signature change, the original method does not work any more. This patch enhanced rewriter to generate two version codes guarded with CONFIG_ARCH_HAS_SYSCALL_WRAPPER. But we need to identify whether a function will be attached to syscall entry function or not during prog load time at which time the program has not attached to any event. The prefix `kprobe__` is used for kprobe autoload, we can use `kprobe____x64_sys_` as the prefix to identify x64 syscall entry functions. To support other architecture or not-autoloading program, the prefix `syscall__` is introduced to signal it is a syscall entry function. trace.py and other tools which uses kprobe syscall entry functions are also modified with the new interface so that they can work properly with 4.17. Signed-off-by: Yonghong Song --- src/cc/frontends/clang/b_frontend_action.cc | 102 +++++++++++++++----- src/cc/frontends/clang/b_frontend_action.h | 5 + tools/execsnoop.py | 4 +- tools/killsnoop.py | 4 +- tools/mountsnoop.py | 8 +- tools/statsnoop.py | 8 +- tools/syncsnoop.py | 4 +- tools/trace.py | 48 +++++++-- 8 files changed, 138 insertions(+), 45 deletions(-) diff --git a/src/cc/frontends/clang/b_frontend_action.cc b/src/cc/frontends/clang/b_frontend_action.cc index d7423f829e7e..a1e1bda8a05e 100644 --- a/src/cc/frontends/clang/b_frontend_action.cc +++ b/src/cc/frontends/clang/b_frontend_action.cc @@ -429,9 +429,83 @@ DiagnosticBuilder ProbeVisitor::error(SourceLocation loc, const char (&fmt)[N]) BTypeVisitor::BTypeVisitor(ASTContext &C, BFrontendAction &fe) : C(C), diag_(C.getDiagnostics()), fe_(fe), rewriter_(fe.rewriter()), out_(llvm::errs()) {} -bool BTypeVisitor::VisitFunctionDecl(FunctionDecl *D) { +void BTypeVisitor::genParamDirectAssign(FunctionDecl *D, string& preamble, + const char **calling_conv_regs) { + for (size_t idx = 0; idx < fn_args_.size(); idx++) { + ParmVarDecl *arg = fn_args_[idx]; + + if (idx >= 1) { + // Move the args into a preamble section where the same params are + // declared and initialized from pt_regs. + // Todo: this init should be done only when the program requests it. + string text = rewriter_.getRewrittenText(expansionRange(arg->getSourceRange())); + arg->addAttr(UnavailableAttr::CreateImplicit(C, "ptregs")); + size_t d = idx - 1; + const char *reg = calling_conv_regs[d]; + preamble += " " + text + " = " + fn_args_[0]->getName().str() + "->" + + string(reg) + ";"; + } + } +} + +void BTypeVisitor::genParamIndirectAssign(FunctionDecl *D, string& preamble, + const char **calling_conv_regs) { + string new_ctx; + + for (size_t idx = 0; idx < fn_args_.size(); idx++) { + ParmVarDecl *arg = fn_args_[idx]; + + if (idx == 0) { + new_ctx = "__" + arg->getName().str(); + preamble += " struct pt_regs * " + new_ctx + " = " + + arg->getName().str() + "->" + + string(calling_conv_regs[0]) + ";"; + } else { + // Move the args into a preamble section where the same params are + // declared and initialized from pt_regs. + // Todo: this init should be done only when the program requests it. + string text = rewriter_.getRewrittenText(expansionRange(arg->getSourceRange())); + size_t d = idx - 1; + const char *reg = calling_conv_regs[d]; + preamble += "\n " + text + ";"; + preamble += " bpf_probe_read(&" + arg->getName().str() + ", sizeof(" + + arg->getName().str() + "), &" + new_ctx + "->" + + string(reg) + ");"; + } + } +} + +void BTypeVisitor::rewriteFuncParam(FunctionDecl *D) { const char **calling_conv_regs = get_call_conv(); + string preamble = "{\n"; + if (D->param_size() > 1) { + // If function prefix is "syscall__" or "kprobe____x64_sys_", + // the function will attach to a kprobe syscall function. + // Guard parameter assiggnment with CONFIG_ARCH_HAS_SYSCALL_WRAPPER. + // For __x64_sys_* syscalls, this is always true, but we guard + // it in case of "syscall__" for other architectures. + if (strncmp(D->getName().str().c_str(), "syscall__", 9) == 0 || + strncmp(D->getName().str().c_str(), "kprobe____x64_sys_", 18) == 0) { + preamble += "#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER\n"; + genParamIndirectAssign(D, preamble, calling_conv_regs); + preamble += "\n#else\n"; + genParamDirectAssign(D, preamble, calling_conv_regs); + preamble += "\n#endif\n"; + } else { + genParamDirectAssign(D, preamble, calling_conv_regs); + } + rewriter_.ReplaceText( + expansionRange(SourceRange(D->getParamDecl(0)->getLocEnd(), + D->getParamDecl(D->getNumParams() - 1)->getLocEnd())), + fn_args_[0]->getName()); + } + // for each trace argument, convert the variable from ptregs to something on stack + if (CompoundStmt *S = dyn_cast(D->getBody())) + rewriter_.ReplaceText(S->getLBracLoc(), 1, preamble); +} + +bool BTypeVisitor::VisitFunctionDecl(FunctionDecl *D) { // put each non-static non-inline function decl in its own section, to be // extracted by the MemoryManager auto real_start_loc = rewriter_.getSourceMgr().getFileLoc(D->getLocStart()); @@ -447,37 +521,17 @@ bool BTypeVisitor::VisitFunctionDecl(FunctionDecl *D) { "too many arguments, bcc only supports in-register parameters"); return false; } - // remember the arg names of the current function...first one is the ctx + fn_args_.clear(); - string preamble = "{"; for (auto arg_it = D->param_begin(); arg_it != D->param_end(); arg_it++) { - auto arg = *arg_it; + auto *arg = *arg_it; if (arg->getName() == "") { error(arg->getLocEnd(), "arguments to BPF program definition must be named"); return false; } fn_args_.push_back(arg); - if (fn_args_.size() > 1) { - // Move the args into a preamble section where the same params are - // declared and initialized from pt_regs. - // Todo: this init should be done only when the program requests it. - string text = rewriter_.getRewrittenText(expansionRange(arg->getSourceRange())); - arg->addAttr(UnavailableAttr::CreateImplicit(C, "ptregs")); - size_t d = fn_args_.size() - 2; - const char *reg = calling_conv_regs[d]; - preamble += " " + text + " = " + fn_args_[0]->getName().str() + "->" + - string(reg) + ";"; - } - } - if (D->param_size() > 1) { - rewriter_.ReplaceText( - expansionRange(SourceRange(D->getParamDecl(0)->getLocEnd(), - D->getParamDecl(D->getNumParams() - 1)->getLocEnd())), - fn_args_[0]->getName()); } - // for each trace argument, convert the variable from ptregs to something on stack - if (CompoundStmt *S = dyn_cast(D->getBody())) - rewriter_.ReplaceText(S->getLBracLoc(), 1, preamble); + rewriteFuncParam(D); } else if (D->hasBody() && rewriter_.getSourceMgr().getFileID(real_start_loc) == rewriter_.getSourceMgr().getMainFileID()) { diff --git a/src/cc/frontends/clang/b_frontend_action.h b/src/cc/frontends/clang/b_frontend_action.h index f5c3b442f991..72498db2faef 100644 --- a/src/cc/frontends/clang/b_frontend_action.h +++ b/src/cc/frontends/clang/b_frontend_action.h @@ -70,6 +70,11 @@ class BTypeVisitor : public clang::RecursiveASTVisitor { private: clang::SourceRange expansionRange(clang::SourceRange range); bool checkFormatSpecifiers(const std::string& fmt, clang::SourceLocation loc); + void genParamDirectAssign(clang::FunctionDecl *D, std::string& preamble, + const char **calling_conv_regs); + void genParamIndirectAssign(clang::FunctionDecl *D, std::string& preamble, + const char **calling_conv_regs); + void rewriteFuncParam(clang::FunctionDecl *D); template clang::DiagnosticBuilder error(clang::SourceLocation loc, const char (&fmt)[N]); template diff --git a/tools/execsnoop.py b/tools/execsnoop.py index 9a66b393e37d..c75712adacad 100755 --- a/tools/execsnoop.py +++ b/tools/execsnoop.py @@ -98,7 +98,7 @@ return 0; } -int do_sys_execve(struct pt_regs *ctx, +int syscall__execve(struct pt_regs *ctx, const char __user *filename, const char __user *const __user *__argv, const char __user *const __user *__envp) @@ -146,7 +146,7 @@ # initialize BPF b = BPF(text=bpf_text) execve_fnname = b.get_syscall_fnname("execve") -b.attach_kprobe(event=execve_fnname, fn_name="do_sys_execve") +b.attach_kprobe(event=execve_fnname, fn_name="syscall__execve") b.attach_kretprobe(event=execve_fnname, fn_name="do_ret_sys_execve") # header diff --git a/tools/killsnoop.py b/tools/killsnoop.py index 96f8044c009c..6022cd0848d3 100755 --- a/tools/killsnoop.py +++ b/tools/killsnoop.py @@ -60,7 +60,7 @@ BPF_HASH(infotmp, u32, struct val_t); BPF_PERF_OUTPUT(events); -int do_sys_kill(struct pt_regs *ctx, int tpid, int sig) +int syscall__kill(struct pt_regs *ctx, int tpid, int sig) { u32 pid = bpf_get_current_pid_tgid(); FILTER @@ -112,7 +112,7 @@ # initialize BPF b = BPF(text=bpf_text) kill_fnname = b.get_syscall_fnname("kill") -b.attach_kprobe(event=kill_fnname, fn_name="do_sys_kill") +b.attach_kprobe(event=kill_fnname, fn_name="syscall__kill") b.attach_kretprobe(event=kill_fnname, fn_name="do_ret_sys_kill") diff --git a/tools/mountsnoop.py b/tools/mountsnoop.py index b8b761b78007..2d0fa1a68935 100755 --- a/tools/mountsnoop.py +++ b/tools/mountsnoop.py @@ -86,7 +86,7 @@ BPF_PERF_OUTPUT(events); -int do_sys_mount(struct pt_regs *ctx, char __user *source, +int syscall__mount(struct pt_regs *ctx, char __user *source, char __user *target, char __user *type, unsigned long flags) { @@ -145,7 +145,7 @@ return 0; } -int do_sys_umount(struct pt_regs *ctx, char __user *target, int flags) +int syscall__umount(struct pt_regs *ctx, char __user *target, int flags) { struct data_t event = {}; struct task_struct *task; @@ -404,10 +404,10 @@ def main(): exit() b = bcc.BPF(text=bpf_text) mount_fnname = b.get_syscall_fnname("mount") - b.attach_kprobe(event=mount_fnname, fn_name="do_sys_mount") + b.attach_kprobe(event=mount_fnname, fn_name="syscall__mount") b.attach_kretprobe(event=mount_fnname, fn_name="do_ret_sys_mount") umount_fnname = b.get_syscall_fnname("umount") - b.attach_kprobe(event=umount_fnname, fn_name="do_sys_umount") + b.attach_kprobe(event=umount_fnname, fn_name="syscall__umount") b.attach_kretprobe(event=umount_fnname, fn_name="do_ret_sys_umount") b['events'].open_perf_buffer( functools.partial(print_event, mounts, umounts)) diff --git a/tools/statsnoop.py b/tools/statsnoop.py index 9ce7b93071a5..6fd8049c1e1b 100755 --- a/tools/statsnoop.py +++ b/tools/statsnoop.py @@ -61,7 +61,7 @@ BPF_HASH(infotmp, u32, struct val_t); BPF_PERF_OUTPUT(events); -int trace_entry(struct pt_regs *ctx, const char __user *filename) +int syscall__entry(struct pt_regs *ctx, const char __user *filename) { struct val_t val = {}; u32 pid = bpf_get_current_pid_tgid(); @@ -116,17 +116,17 @@ # actually exist before attaching the probes syscall_fnname = b.get_syscall_fnname("stat") if BPF.ksymname(syscall_fnname) != -1: - b.attach_kprobe(event=syscall_fnname, fn_name="trace_entry") + b.attach_kprobe(event=syscall_fnname, fn_name="syscall__entry") b.attach_kretprobe(event=syscall_fnname, fn_name="trace_return") syscall_fnname = b.get_syscall_fnname("statfs") if BPF.ksymname(syscall_fnname) != -1: - b.attach_kprobe(event=syscall_fnname, fn_name="trace_entry") + b.attach_kprobe(event=syscall_fnname, fn_name="syscall__entry") b.attach_kretprobe(event=syscall_fnname, fn_name="trace_return") syscall_fnname = b.get_syscall_fnname("newstat") if BPF.ksymname(syscall_fnname) != -1: - b.attach_kprobe(event=syscall_fnname, fn_name="trace_entry") + b.attach_kprobe(event=syscall_fnname, fn_name="syscall__entry") b.attach_kretprobe(event=syscall_fnname, fn_name="trace_return") TASK_COMM_LEN = 16 # linux/sched.h diff --git a/tools/syncsnoop.py b/tools/syncsnoop.py index 6e35c175dfa9..ba3f1d3749a7 100755 --- a/tools/syncsnoop.py +++ b/tools/syncsnoop.py @@ -25,14 +25,14 @@ BPF_PERF_OUTPUT(events); -void do_sys_sync(void *ctx) { +void syscall__sync(void *ctx) { struct data_t data = {}; data.ts = bpf_ktime_get_ns() / 1000; events.perf_submit(ctx, &data, sizeof(data)); }; """) b.attach_kprobe(event=b.get_syscall_fnname("sync"), - fn_name="do_sys_sync") + fn_name="syscall__sync") class Data(ct.Structure): _fields_ = [ diff --git a/tools/trace.py b/tools/trace.py index 73f626cf5f96..549fb20dcead 100755 --- a/tools/trace.py +++ b/tools/trace.py @@ -64,6 +64,11 @@ def __init__(self, probe, string_size, kernel_stack, user_stack): self.probe_name = re.sub(r'[^A-Za-z0-9_]', '_', self.probe_name) + # compiler can generate proper codes for function + # signatures with "syscall__" prefix + if self.is_syscall_kprobe: + self.probe_name = "syscall__" + self.probe_name[6:] + def __str__(self): return "%s:%s:%s FLT=%s ACT=%s/%s" % (self.probe_type, self.library, self._display_function(), self.filter, @@ -154,6 +159,12 @@ def _parse_spec(self, spec): self.library = ':'.join(parts[1:-1]) self.function = parts[-1] + # only x64 syscalls needs checking, no other syscall wrapper yet. + self.is_syscall_kprobe = False + if self.probe_type == "p" and len(self.library) == 0 and \ + self.function[:10] == "__x64_sys_": + self.is_syscall_kprobe = True + def _find_usdt_probe(self): target = Probe.pid if Probe.pid and Probe.pid != -1 \ else Probe.tgid @@ -194,14 +205,32 @@ def _parse_action(self, action): if len(part) > 0: self.values.append(part) - aliases = { - "retval": "PT_REGS_RC(ctx)", + aliases_arg = { "arg1": "PT_REGS_PARM1(ctx)", "arg2": "PT_REGS_PARM2(ctx)", "arg3": "PT_REGS_PARM3(ctx)", "arg4": "PT_REGS_PARM4(ctx)", "arg5": "PT_REGS_PARM5(ctx)", "arg6": "PT_REGS_PARM6(ctx)", + } + + aliases_indarg = { + "arg1": "({u64 _val; struct pt_regs *_ctx = PT_REGS_PARM1(ctx);" + " bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM1(_ctx))); _val;})", + "arg2": "({u64 _val; struct pt_regs *_ctx = PT_REGS_PARM2(ctx);" + " bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM2(_ctx))); _val;})", + "arg3": "({u64 _val; struct pt_regs *_ctx = PT_REGS_PARM3(ctx);" + " bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM3(_ctx))); _val;})", + "arg4": "({u64 _val; struct pt_regs *_ctx = PT_REGS_PARM4(ctx);" + " bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM4(_ctx))); _val;})", + "arg5": "({u64 _val; struct pt_regs *_ctx = PT_REGS_PARM5(ctx);" + " bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM5(_ctx))); _val;})", + "arg6": "({u64 _val; struct pt_regs *_ctx = PT_REGS_PARM6(ctx);" + " bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM6(_ctx))); _val;})", + } + + aliases_common = { + "retval": "PT_REGS_RC(ctx)", "$uid": "(unsigned)(bpf_get_current_uid_gid() & 0xffffffff)", "$gid": "(unsigned)(bpf_get_current_uid_gid() >> 32)", "$pid": "(unsigned)(bpf_get_current_pid_tgid() & 0xffffffff)", @@ -229,13 +258,19 @@ def _generate_streq_function(self, string): return fname def _rewrite_expr(self, expr): - for alias, replacement in Probe.aliases.items(): + if self.is_syscall_kprobe: + for alias, replacement in Probe.aliases_indarg.items(): + expr = expr.replace(alias, replacement) + else: + for alias, replacement in Probe.aliases_arg.items(): # For USDT probes, we replace argN values with the # actual arguments for that probe obtained using # bpf_readarg_N macros emitted at BPF construction. - if alias.startswith("arg") and self.probe_type == "u": + if self.probe_type == "u": continue expr = expr.replace(alias, replacement) + for alias, replacement in Probe.aliases_common.items(): + expr = expr.replace(alias, replacement) matches = re.finditer('STRCMP\\(("[^"]+\\")', expr) for match in matches: string = match.group(1) @@ -362,9 +397,8 @@ def _generate_usdt_filter_read(self): text = "" if self.probe_type != "u": return text - for arg, _ in Probe.aliases.items(): - if not (arg.startswith("arg") and - (arg in self.filter)): + for arg, _ in Probe.aliases_arg.items(): + if not (arg in self.filter): continue arg_index = int(arg.replace("arg", "")) arg_ctype = self.usdt.get_probe_arg_ctype(