From 4f88a9401357d7b75e917abd994aa6ea97dda4d3 Mon Sep 17 00:00:00 2001 From: Brendan Gregg Date: Fri, 22 Jul 2016 17:11:51 -0700 Subject: [PATCH] USDT Python API and example (#624) * Python USDT API Code from @vmg * Basic USDT example * retire procstat.py * improve/fix USDT exceptions --- examples/tracing/nodejs_http_server.py | 54 +++ src/cc/bcc_usdt.h | 2 +- src/cc/usdt.cc | 8 +- src/lua/bcc/usdt.lua | 7 +- src/python/bcc/__init__.py | 10 +- src/python/bcc/libbcc.py | 20 ++ src/python/bcc/procstat.py | 125 ------- src/python/bcc/usdt.py | 475 ++----------------------- tools/argdist.py | 79 ++-- tools/trace.py | 2 +- 10 files changed, 150 insertions(+), 632 deletions(-) create mode 100755 examples/tracing/nodejs_http_server.py delete mode 100644 src/python/bcc/procstat.py diff --git a/examples/tracing/nodejs_http_server.py b/examples/tracing/nodejs_http_server.py new file mode 100755 index 000000000000..271731b79f8b --- /dev/null +++ b/examples/tracing/nodejs_http_server.py @@ -0,0 +1,54 @@ +#!/usr/bin/python +# +# nodejs_http_server Basic example of node.js USDT tracing. +# For Linux, uses BCC, BPF. Embedded C. +# +# USAGE: nodejs_http_server PID +# +# Copyright 2016 Netflix, Inc. +# Licensed under the Apache License, Version 2.0 (the "License") + +from __future__ import print_function +from bcc import BPF, USDT +import sys + +if len(sys.argv) < 2: + print("USAGE: nodejs_http_server PID") + exit() +pid = sys.argv[1] +debug = 0 + +# load BPF program +bpf_text = """ +#include +int do_trace(struct pt_regs *ctx) { + uint64_t addr; + char path[128]; + bpf_usdt_readarg(6, ctx, &addr); + bpf_probe_read(&path, sizeof(path), (void *)addr); + bpf_trace_printk("path:%s\\n", path); + return 0; +}; +""" + +# enable USDT probe from given PID +u = USDT(pid=int(pid)) +u.enable_probe(probe="http__server__request", fn_name="do_trace") +if debug: + print(u.get_text()) + print(bpf_text) + +# initialize BPF +b = BPF(text=bpf_text, usdt=u) + +# header +print("%-18s %-16s %-6s %s" % ("TIME(s)", "COMM", "PID", "ARGS")) + +# format output +while 1: + try: + (task, pid, cpu, flags, ts, msg) = b.trace_fields() + except ValueError: + print("value error") + continue + print("%-18.9f %-16s %-6d %s" % (ts, task, pid, msg)) diff --git a/src/cc/bcc_usdt.h b/src/cc/bcc_usdt.h index e303da80adac..7148b105fcdd 100644 --- a/src/cc/bcc_usdt.h +++ b/src/cc/bcc_usdt.h @@ -27,7 +27,7 @@ void *bcc_usdt_new_frompath(const char *path); void bcc_usdt_close(void *usdt); int bcc_usdt_enable_probe(void *, const char *, const char *); -char *bcc_usdt_genargs(void *); +const char *bcc_usdt_genargs(void *); typedef void (*bcc_usdt_uprobe_cb)(const char *, const char *, uint64_t, int); void bcc_usdt_foreach_uprobe(void *usdt, bcc_usdt_uprobe_cb callback); diff --git a/src/cc/usdt.cc b/src/cc/usdt.cc index c2b945994272..a469eead2161 100644 --- a/src/cc/usdt.cc +++ b/src/cc/usdt.cc @@ -319,12 +319,16 @@ int bcc_usdt_enable_probe(void *usdt, const char *probe_name, return ctx->enable_probe(probe_name, fn_name) ? 0 : -1; } -char *bcc_usdt_genargs(void *usdt) { +const char *bcc_usdt_genargs(void *usdt) { + static std::string storage_; + USDT::Context *ctx = static_cast(usdt); std::ostringstream stream; if (!ctx->generate_usdt_args(stream)) return nullptr; - return strdup(stream.str().c_str()); + + storage_ = stream.str(); + return storage_.c_str(); } void bcc_usdt_foreach_uprobe(void *usdt, bcc_usdt_uprobe_cb callback) { diff --git a/src/lua/bcc/usdt.lua b/src/lua/bcc/usdt.lua index fefe7c71f4c1..e9788da0714b 100644 --- a/src/lua/bcc/usdt.lua +++ b/src/lua/bcc/usdt.lua @@ -14,8 +14,6 @@ See the License for the specific language governing permissions and limitations under the License. ]] local ffi = require("ffi") -ffi.cdef "void free(void *ptr);" - local libbcc = require("bcc.libbcc") local Usdt = class("USDT") @@ -56,10 +54,7 @@ end function Usdt:_get_text() local argc = libbcc.bcc_usdt_genargs(self.context) assert(argc ~= nil) - - local text = ffi.string(argc) - ffi.C.free(argc) - return text + return ffi.string(argc) end function Usdt:_attach_uprobes(bpf) diff --git a/src/python/bcc/__init__.py b/src/python/bcc/__init__.py index 3563e7d0433a..8a9b9078d2f0 100644 --- a/src/python/bcc/__init__.py +++ b/src/python/bcc/__init__.py @@ -25,7 +25,6 @@ basestring = (unicode if sys.version_info[0] < 3 else str) from .libbcc import lib, _CB_TYPE, bcc_symbol -from .procstat import ProcStat, ProcUtils from .table import Table from .tracepoint import Tracepoint from .perf import Perf @@ -117,7 +116,8 @@ def _find_file(filename): raise Exception("Could not find file %s" % filename) return filename - def __init__(self, src_file="", hdr_file="", text=None, cb=None, debug=0, cflags=[]): + def __init__(self, src_file="", hdr_file="", text=None, cb=None, debug=0, + cflags=[], usdt=None): """Create a a new BPF module with the given source code. Note: @@ -147,6 +147,8 @@ def __init__(self, src_file="", hdr_file="", text=None, cb=None, debug=0, cflags self.tables = {} cflags_array = (ct.c_char_p * len(cflags))() for i, s in enumerate(cflags): cflags_array[i] = s.encode("ascii") + if usdt and text: text = usdt.get_text() + text + if text: self.module = lib.bpf_module_create_c_from_string(text.encode("ascii"), self.debug, cflags_array, len(cflags_array)) @@ -163,6 +165,8 @@ def __init__(self, src_file="", hdr_file="", text=None, cb=None, debug=0, cflags if not self.module: raise Exception("Failed to compile BPF module %s" % src_file) + if usdt: usdt.attach_uprobes(self) + # If any "kprobe__" or "tracepoint__" prefixed functions were defined, # they will be loaded and attached here. self._trace_autoload() @@ -785,4 +789,4 @@ def cleanup(self): self.tracefile.close() -from .usdt import USDTReader +from .usdt import USDT diff --git a/src/python/bcc/libbcc.py b/src/python/bcc/libbcc.py index 33d0f1685f9b..c847c00da92f 100644 --- a/src/python/bcc/libbcc.py +++ b/src/python/bcc/libbcc.py @@ -135,3 +135,23 @@ class bcc_symbol(ct.Structure): lib.bcc_symcache_refresh.restype = None lib.bcc_symcache_refresh.argtypes = [ct.c_void_p] + +lib.bcc_usdt_new_frompid.restype = ct.c_void_p +lib.bcc_usdt_new_frompid.argtypes = [ct.c_int] + +lib.bcc_usdt_new_frompath.restype = ct.c_void_p +lib.bcc_usdt_new_frompath.argtypes = [ct.c_char_p] + +lib.bcc_usdt_close.restype = None +lib.bcc_usdt_close.argtypes = [ct.c_void_p] + +lib.bcc_usdt_enable_probe.restype = ct.c_int +lib.bcc_usdt_enable_probe.argtypes = [ct.c_void_p, ct.c_char_p, ct.c_char_p] + +lib.bcc_usdt_genargs.restype = ct.c_char_p +lib.bcc_usdt_genargs.argtypes = [ct.c_void_p] + +_USDT_CB = ct.CFUNCTYPE(None, ct.c_char_p, ct.c_char_p, ct.c_ulonglong, ct.c_int) + +lib.bcc_usdt_foreach_uprobe.restype = None +lib.bcc_usdt_foreach_uprobe.argtypes = [ct.c_void_p, _USDT_CB] diff --git a/src/python/bcc/procstat.py b/src/python/bcc/procstat.py deleted file mode 100644 index f00993d1d975..000000000000 --- a/src/python/bcc/procstat.py +++ /dev/null @@ -1,125 +0,0 @@ -# Copyright 2016 Sasha Goldshtein -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -class ProcStat(object): - def __init__(self, pid): - self.pid = pid - self.exe = self._get_exe() - self.start_time = self._get_start_time() - - def is_stale(self): - return self.exe != self._get_exe() or \ - self.start_time != self._get_start_time() - - def _get_exe(self): - return os.popen("readlink -f /proc/%d/exe" % self.pid).read() - - def _get_start_time(self): - return os.popen("cut -d' ' -f 22 /proc/%d/stat" % - self.pid).read() - -class ProcUtils(object): - @staticmethod - def get_load_address(pid, bin_path): - """ - get_load_address(pid, bin_path) - - Returns the address at which the specified module is loaded - in the specified process. The module path must match exactly - the file system path, not a symbolic link. - """ - with open("/proc/%d/maps" % pid) as m: - maps = m.readlines() - addrs = map(lambda l: l.split('-')[0], - filter(lambda l: bin_path in l, maps) - ) - if len(addrs) == 0: - raise ValueError("lib %s not loaded in pid %d" - % (bin_path, pid)) - return int(addrs[0], 16) - - @staticmethod - def get_modules(pid): - """ - get_modules(pid) - - Returns a list of all the modules loaded into the specified - process. Modules are enumerated by looking at /proc/$PID/maps - and returning the module name for regions that contain - executable code. - """ - with open("/proc/%d/maps" % pid) as f: - maps = f.readlines() - modules = [] - for line in maps: - parts = line.strip().split() - if len(parts) < 6: - continue - if parts[5][0] == '[' or not 'x' in parts[1]: - continue - modules.append(parts[5]) - return modules - - @staticmethod - def is_shared_object(bin_path): - """ - is_shared_object(bin_path) - - Returns whether the specified binary is a shared object, rather - than an executable. If it is neither, an error is raised. - """ - mime_type = os.popen("file --mime-type -b %s" % bin_path - ).read().strip() - if mime_type == "application/x-sharedlib": - return True - if mime_type == "application/x-executable": - return False - raise ValueError("invalid mime type %s for binary %s" % - (mime_type, bin_path)) - - @staticmethod - def traverse_symlink(path): - """Returns the actual path behind the specified symlink.""" - return os.popen("readlink -f %s" % path).read().strip() - - @staticmethod - def which(bin_path): - """ - which(bin_path) - - Traverses the PATH environment variable, looking for the first - directory that contains an executable file named bin_path, and - returns the full path to that file, or None if no such file - can be found. This is meant to replace invocations of the - "which" shell utility, which doesn't have portable semantics - for skipping aliases. - """ - # Source: http://stackoverflow.com/a/377028 - def is_exe(fpath): - return os.path.isfile(fpath) and \ - os.access(fpath, os.X_OK) - - fpath, fname = os.path.split(bin_path) - if fpath: - if is_exe(bin_path): - return bin_path - else: - for path in os.environ["PATH"].split(os.pathsep): - path = path.strip('"') - exe_file = os.path.join(path, bin_path) - if is_exe(exe_file): - return exe_file - return None diff --git a/src/python/bcc/usdt.py b/src/python/bcc/usdt.py index b4e315174a9d..98d87b85827a 100644 --- a/src/python/bcc/usdt.py +++ b/src/python/bcc/usdt.py @@ -12,447 +12,34 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os -import struct -import re - -from . import BPF -from . import ProcStat, ProcUtils - -class USDTArgument(object): - def __init__(self, size, is_signed, location, - register=None, constant=None, deref_offset=None, - deref_name=None): - self.size = size - self.is_signed = is_signed - self.location = location - self.register = register - self.constant = constant - self.deref_offset = deref_offset - self.deref_name = deref_name - - def _normalize_register(self): - normalized = self.register - if normalized is None: - return None - if normalized.startswith('%'): - normalized = normalized[1:] - if normalized in USDTArgument.translations: - normalized = USDTArgument.translations[normalized] - return normalized - - translations = { - "rax": "ax", "rbx": "bx", "rcx": "cx", "rdx": "dx", - "rdi": "di", "rsi": "si", "rbp": "bp", "rsp": "sp", - "rip": "ip", "eax": "ax", "ebx": "bx", "ecx": "cx", - "edx": "dx", "edi": "di", "esi": "si", "ebp": "bp", - "esp": "sp", "eip": "ip", "al": "ax", "bl": "bx", - "cl": "cx", "dl": "dx" - } - - def generate_assign_to_local(self, local_name, pid=None): - """ - generate_assign_to_local(local_name, pid=None) - - Generates an assignment statement that initializes a local - variable with the value of this argument. Assumes that the - struct pt_regs pointer is called 'ctx', and accesses registers - from that pointer. The local variable must already be declared - by the caller. Use get_type() to get the proper type for that - declaration. - - The pid parameter is intended for use when the argument depends - on an address that is process-specific. This only happens for - arguments that are offsets from globals -- the load address for - the global depends on the process. If no pid is specified and - the argument depends on an address that is process-specific, - an error is raised. - - Example output: - local1 = (u64)ctx->di; - { - u64 __tmp; - bpf_probe_read(&__tmp, sizeof(__tmp), - (void *)(ctx->bp - 8)); - bpf_probe_read(&local2, sizeof(local2), - (void *)__tmp); - } - """ - normalized_reg = self._normalize_register() - if self.constant is not None: - # Simplest case, it's just a constant - return "%s = %d;" % (local_name, self.constant) - if self.deref_offset is None: - # Simple read from the specified register - return "%s = (%s)ctx->%s;" % \ - (local_name, self.get_type(), normalized_reg) - # Note that the cast to a smaller type should grab the - # relevant part of the register anyway, if we're dealing - # with 32/16/8-bit registers like ecx, dx, al, etc. - - if self.deref_offset is not None and self.deref_name is None: - # Add deref_offset to register value and bpf_probe_read - # from the resulting address - return \ -"""{ - u64 __temp = ctx->%s + (%d); - bpf_probe_read(&%s, sizeof(%s), (void *)__temp); -} """ % (normalized_reg, self.deref_offset, - local_name, local_name) - - # Final case: dereference global, need to find address of global - # with the provided name and then potentially add deref_offset - # and bpf_probe_read the result. - return \ -"""{ - u64 __temp = 0x%x + %d; - bpf_probe_read(&%s, sizeof(%s), (void *)__temp); -} """ % (self._get_global_address(pid), self.deref_offset, - local_name, local_name) - - def _get_global_address(self, pid=None): - # If this is a library, we need to find its load address in the - # specified process and then add the global symbol's offset. - # If this is an executable, the global symbol's address doesn't - # depend on the pid. - bin_path = self.location.probe.bin_path - offset = self._get_global_offset(bin_path) - if ProcUtils.is_shared_object(self.location.probe.bin_path): - if pid is None: - raise ValueError("pid is required for " + - "argument '%s'" % str(self)) - load_address = ProcUtils.get_load_address(pid, bin_path) - return load_address + offset - else: - return offset - - def _get_global_offset(self, bin_path): - with os.popen("objdump -tT %s | grep '\\s%s$'" % - (bin_path, self.deref_name)) as f: - lines = f.readlines() - for line in lines: - parts = line.split() - if parts[5] != self.deref_name: - continue - return int(parts[0], 16) - raise ValueError("can't find global symbol %s" % - self.deref_name) - - def get_type(self): - result_type = None - if self.size == 1: - result_type = "char" - elif self.size == 2: - result_type = "short" - elif self.size == 4: - result_type = "int" - elif self.size == 8: - result_type = "long" - - if result_type is None: - raise ValueError("arguments of size %d are not " + - "currently supported" % self.size) - - if not self.is_signed: - result_type = "unsigned " + result_type - - return result_type - - def __str__(self): - prefix = "%d %s bytes @ " % (self.size, - " signed" if self.is_signed else "unsigned") - if self.constant is not None: - return prefix + "constant %d" % self.constant - if self.deref_offset is None: - return prefix + "register " + self.register - if self.deref_offset is not None and self.deref_name is None: - return prefix + "%d(%s)" % (self.deref_offset, - self.register) - return prefix + "%d from %s global" % (self.deref_offset, - self.deref_name) - -class USDTProbeLocation(object): - def __init__(self, address, args, probe): - self.address = address - self.raw_args = args - self.probe = probe - self.args = [] - self._parse_args() - - def generate_usdt_assignments(self, prefix="arg", pid=None): - text = "" - for i, arg in enumerate(self.args, 1): - text += (" "*16) + \ - arg.generate_assign_to_local( - "%s%d" % (prefix, i), pid) + "\n" - return text - - def _parse_args(self): - for arg in self.raw_args.split(): - self._parse_arg(arg.strip()) - - def _parse_arg(self, arg): - qregs = ["%rax", "%rbx", "%rcx", "%rdx", "%rdi", "%rsi", - "%rbp", "%rsp", "%rip", "%r8", "%r9", "%r10", "%r11", - "%r12", "%r13", "%r14", "%r15"] - dregs = ["%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi", - "%ebp", "%esp", "%eip"] - wregs = ["%ax", "%bx", "%cx", "%dx", "%di", "%si", - "%bp", "%sp", "%ip"] - bregs = ["%al", "%bl", "%cl", "%dl"] - - any_reg = "(" + "|".join(qregs + dregs + wregs + bregs) + ")" - - # -4@$0, 8@$1234 - m = re.match(r'(\-?)(\d+)@\$(\-?)(\d+)', arg) - if m is not None: - sign = -1 if len(m.group(3)) > 0 else 1 - self.args.append(USDTArgument( - int(m.group(2)), - m.group(1) == '-', - self, - constant=sign*int(m.group(4)) - )) - return - - # %rdi, %rax, %rsi - m = re.match(any_reg, arg) - if m is not None: - if arg in qregs: - size = 8 - elif arg in dregs: - size = 4 - elif arg in wregs: - size = 2 - elif arg in bregs: - size = 1 - self.args.append(USDTArgument( - size, False, self, register=arg - )) - return - - # -8@%rbx, 4@%r12 - m = re.match(r'(\-?)(\d+)@' + any_reg, arg) - if m is not None: - self.args.append(USDTArgument( - int(m.group(2)), # Size (in bytes) - m.group(1) == '-', # Signed - self, - register=m.group(3) - )) - return - - # 8@-8(%rbp), 4@(%rax) - m = re.match(r'(\-?)(\d+)@(\-?)(\d*)\(' + any_reg + r'\)', arg) - if m is not None: - deref_offset = int(m.group(4)) if len(m.group(4)) > 0 \ - else 0 - if m.group(3) == '-': - deref_offset = -deref_offset - self.args.append(USDTArgument( - int(m.group(2)), m.group(1) == '-', self, - register=m.group(5), deref_offset=deref_offset - )) - return - - # -4@global_max_action(%rip) - m = re.match(r'(\-?)(\d+)@(\w+)\(%rip\)', arg) - if m is not None: - self.args.append(USDTArgument( - int(m.group(2)), m.group(1) == '-', self, - register="%rip", deref_name=m.group(3), - deref_offset=0 - )) - return - - # 8@24+mp_(@rip) - m = re.match(r'(\-?)(\d+)@(\-?)(\d+)\+(\w+)\(%rip\)', arg) - if m is not None: - deref_offset = int(m.group(4)) - if m.group(3) == '-': - deref_offset = -deref_offset - self.args.append(USDTArgument( - int(m.group(2)), m.group(1) == '-', self, - register="%rip", deref_offset=deref_offset, - deref_name=m.group(5) - )) - return - - raise ValueError("unrecognized argument format: '%s'" % arg) - - -class USDTProbe(object): - def __init__(self, bin_path, provider, name, semaphore): - self.bin_path = bin_path - self.provider = provider - self.name = name - self.semaphore = semaphore - self.enabled_procs = {} - self.proc_semas = {} - self.locations = [] - - def add_location(self, location, arguments): - self.locations.append(USDTProbeLocation( - location, arguments, self)) - - def need_enable(self): - """ - Returns whether this probe needs to be enabled in each - process that uses it. Probes that must be enabled can't be - traced without specifying a specific pid. - """ - return self.semaphore != 0 - - def enable(self, pid): - """Enables this probe in the specified process.""" - self._add_to_semaphore(pid, +1) - self.enabled_procs[pid] = ProcStat(pid) - - def disable(self, pid): - """Disables the probe in the specified process.""" - if pid not in self.enabled_procs: - raise ValueError("probe wasn't enabled in this process") - # Because of the possibility of pid wrap, it's extremely - # important to verify that we are still dealing with the same - # process. Otherwise, we are overwriting random memory in some - # other process :-) - if not self.enabled_procs[pid].is_stale(): - self._add_to_semaphore(pid, -1) - del(self.enabled_procs[pid]) - - def get_arg_types(self): - """ - Returns the argument types used by this probe. Different probe - locations might use different argument types, e.g. signed i32 - vs. unsigned i64. We should take the largest type, and the - sign really doesn't matter that much. - """ - arg_types = [] - for i in range(len(self.locations[0].args)): - max_size_loc = max(self.locations, key=lambda loc: - loc.args[i].size) - arg_types.append(max_size_loc.args[i].get_type()) - return arg_types - - def generate_usdt_thunks(self, name_prefix, thunk_names): - text = "" - for i in range(len(self.locations)): - thunk_name = "%s_thunk_%d" % (name_prefix, i) - thunk_names.append(thunk_name) - text += """ -int %s(struct pt_regs *ctx) { - return %s(ctx, %d); -} """ % (thunk_name, name_prefix, i) - return text - - def generate_usdt_cases(self, pid=None): - text = "" - for i, arg_type in enumerate(self.get_arg_types(), 1): - text += " %s arg%d = 0;\n" % (arg_type, i) - for i, location in enumerate(self.locations): - assignments = location.generate_usdt_assignments( - pid=pid) - text += \ -""" - if (__loc_id == %d) { -%s - } \n""" % (i, assignments) - return text - - def _ensure_proc_sema(self, pid): - if pid in self.proc_semas: - return self.proc_semas[pid] - - if ProcUtils.is_shared_object(self.bin_path): - # Semaphores declared in shared objects are relative - # to that shared object's load address - sema_addr = ProcUtils.get_load_address( - pid, self.bin_path) + self.semaphore - else: - sema_addr = self.semaphore # executable, absolute - self.proc_semas[pid] = sema_addr - return sema_addr - - def _add_to_semaphore(self, pid, val): - sema_addr = self._ensure_proc_sema(pid) - with open("/proc/%d/mem" % pid, "r+b") as fd: - fd.seek(sema_addr, 0) - prev = struct.unpack("H", fd.read(2))[0] - fd.seek(sema_addr, 0) - fd.write(struct.pack("H", prev + val)) - - def __str__(self): - return "%s %s:%s" % (self.bin_path, self.provider, self.name) - - def display_verbose(self): - text = str(self) + " [sema 0x%x]\n" % self.semaphore - for location in self.locations: - text += " location 0x%x raw args: %s\n" % \ - (location.address, location.raw_args) - for arg in location.args: - text += " %s\n" % str(arg) - return text - -class USDTReader(object): - def __init__(self, bin_path="", pid=-1): - """ - __init__(bin_path="", pid=-1) - - Reads all the probes from the specified library, executable, - or process. If a pid is specified, all the libraries (including - the executable) are searched for probes. After initialization - completes, the found probes are in the 'probes' property. - """ - self.probes = [] - if pid != -1: - for mod in ProcUtils.get_modules(pid): - self._add_probes(mod) - elif len(bin_path) != 0: - self._add_probes(bin_path) - else: - raise ValueError("pid or bin_path is required") - - def _add_probes(self, bin_path): - if not os.path.isfile(bin_path): - attempt1 = ProcUtils.which(bin_path) - if attempt1 is None or not os.path.isfile(attempt1): - attempt2 = BPF.find_library(bin_path) - if attempt2 is None or \ - not os.path.isfile(attempt2): - raise ValueError("can't find %s" - % bin_path) - else: - bin_path = attempt2 - else: - bin_path = attempt1 - bin_path = ProcUtils.traverse_symlink(bin_path) - - with os.popen("readelf -n %s 2>/dev/null" % bin_path) as child: - notes = child.read() - for match in re.finditer(r'stapsdt.*?NT_STAPSDT.*?Provider: ' + - r'(\w+).*?Name: (\w+).*?Location: (\w+), Base: ' + - r'(\w+), Semaphore: (\w+).*?Arguments: ([^\n]*)', - notes, re.DOTALL): - self._add_or_merge_probe( - bin_path, match.group(1), match.group(2), - int(match.group(3), 16), - int(match.group(5), 16), match.group(6) - ) - # Note that BPF.attach_uprobe takes care of subtracting - # the load address for that bin, so we can report the actual - # address that appears in the note - - def _add_or_merge_probe(self, bin_path, provider, name, location, - semaphore, arguments): - matches = filter(lambda p: p.provider == provider and \ - p.name == name, self.probes) - if len(matches) > 0: - probe = matches[0] - else: - probe = USDTProbe(bin_path, provider, name, semaphore) - self.probes.append(probe) - probe.add_location(location, arguments) - - def __str__(self): - return "\n".join(map(USDTProbe.display_verbose, self.probes)) - +from .libbcc import lib, _USDT_CB + +class USDT(object): + def __init__(self, pid=None, path=None): + if pid: + self.pid = pid + self.context = lib.bcc_usdt_new_frompid(pid) + if self.context == None: + raise Exception("USDT failed to instrument PID %d" % pid) + elif path: + self.path = path + self.context = lib.bcc_usdt_new_frompath(path) + if self.context == None: + raise Exception("USDT failed to instrument path %s" % path) + + def enable_probe(self, probe, fn_name): + if lib.bcc_usdt_enable_probe(self.context, probe, fn_name) != 0: + raise Exception("failed to enable probe '%s'" % probe) + + def get_text(self): + return lib.bcc_usdt_genargs(self.context) + + def attach_uprobes(self, bpf): + probes = [] + def _add_probe(binpath, fn_name, addr, pid): + probes.append((binpath, fn_name, addr, pid)) + + lib.bcc_usdt_foreach_uprobe(self.context, _USDT_CB(_add_probe)) + + for (binpath, fn_name, addr, pid) in probes: + bpf.attach_uprobe(name=binpath, fn_name=fn_name, addr=addr, pid=pid) diff --git a/tools/argdist.py b/tools/argdist.py index 9c5e6d57b9a4..36d0425dc473 100755 --- a/tools/argdist.py +++ b/tools/argdist.py @@ -12,7 +12,7 @@ # Licensed under the Apache License, Version 2.0 (the "License") # Copyright (C) 2016 Sasha Goldshtein. -from bcc import BPF, Tracepoint, Perf, ProcUtils, USDTReader +from bcc import BPF, Tracepoint, Perf, USDT from time import sleep, strftime import argparse import re @@ -175,8 +175,8 @@ def _parse_exprs(self, exprs): self._bail("no exprs specified") self.exprs = exprs.split(',') - def __init__(self, type, specifier, pid): - self.pid = pid + def __init__(self, bpf, type, specifier): + self.pid = bpf.args.pid self.raw_spec = specifier self._validate_specifier() @@ -198,8 +198,10 @@ def __init__(self, type, specifier, pid): self.function = "perf_trace_" + self.function elif self.probe_type == "u": self.library = parts[1] - self._find_usdt_probe() - self._enable_usdt_probe() + self.probe_func_name = "%s_probe%d" % \ + (self.function, Probe.next_probe_index) + bpf.enable_usdt_probe(self.function, + fn_name=self.probe_func_name) else: self.library = parts[1] self.is_user = len(self.library) > 0 @@ -240,26 +242,8 @@ def check(expr): (self.function, Probe.next_probe_index) Probe.next_probe_index += 1 - def _enable_usdt_probe(self): - if self.usdt.need_enable(): - if self.pid is None: - self._bail("probe needs pid to enable") - self.usdt.enable(self.pid) - - def _disable_usdt_probe(self): - if self.probe_type == "u" and self.usdt.need_enable(): - self.usdt.disable(self.pid) - def close(self): - self._disable_usdt_probe() - - def _find_usdt_probe(self): - reader = USDTReader(bin_path=self.library) - for probe in reader.probes: - if probe.name == self.function: - self.usdt = probe - return - self._bail("unrecognized USDT probe %s" % self.function) + pass def _substitute_exprs(self): def repl(expr): @@ -279,12 +263,18 @@ def _generate_hash_field(self, i): return "%s v%d;\n" % (self.expr_types[i], i) def _generate_field_assignment(self, i): + text = "" + if self.probe_type == "u" and self.exprs[i][0:3] == "arg": + text = (" u64 %s;\n" + + " bpf_usdt_readarg(%s, ctx, &%s);\n") % \ + (self.exprs[i], self.exprs[i][3], self.exprs[i]) if self._is_string(self.expr_types[i]): - return (" bpf_probe_read(&__key.v%d.s," + + return (text + " bpf_probe_read(&__key.v%d.s," + " sizeof(__key.v%d.s), (void *)%s);\n") % \ (i, i, self.exprs[i]) else: - return " __key.v%d = %s;\n" % (i, self.exprs[i]) + return text + " __key.v%d = %s;\n" % \ + (i, self.exprs[i]) def _generate_hash_decl(self): if self.type == "hist": @@ -331,7 +321,7 @@ def generate_text(self): probe_text = """ DATA_DECL -QUALIFIER int PROBENAME(struct pt_regs *ctx SIGNATURE) +int PROBENAME(struct pt_regs *ctx SIGNATURE) { PID_FILTER PREFIX @@ -342,7 +332,6 @@ def generate_text(self): } """ prefix = "" - qualifier = "" signature = "" # If any entry arguments are probed in a ret probe, we need @@ -357,10 +346,6 @@ def generate_text(self): if self.probe_type == "t": program += self.tp.generate_struct() prefix += self.tp.generate_get_struct() - elif self.probe_type == "u": - qualifier = "static inline" - signature = ", int __loc_id" - prefix += self.usdt.generate_usdt_cases() elif self.probe_type == "p" and len(self.signature) > 0: # Only entry uprobes/kprobes can have user-specified # signatures. Other probes force it to (). @@ -380,12 +365,6 @@ def generate_text(self): "1" if len(self.filter) == 0 else self.filter) program = program.replace("COLLECT", collect) program = program.replace("PREFIX", prefix) - program = program.replace("QUALIFIER", qualifier) - - if self.probe_type == "u": - self.usdt_thunk_names = [] - program += self.usdt.generate_usdt_thunks( - self.probe_func_name, self.usdt_thunk_names) return program @@ -396,13 +375,7 @@ def _attach_u(self): if libpath is None or len(libpath) == 0: self._bail("unable to find library %s" % self.library) - if self.probe_type == "u": - for i, location in enumerate(self.usdt.locations): - self.bpf.attach_uprobe(name=libpath, - addr=location.address, - fn_name=self.usdt_thunk_names[i], - pid=self.pid or -1) - elif self.probe_type == "r": + if self.probe_type == "r": self.bpf.attach_uretprobe(name=libpath, sym=self.function, fn_name=self.probe_func_name, @@ -423,6 +396,7 @@ def _attach_k(self): def attach(self, bpf): self.bpf = bpf + if self.probe_type == "u": return; if self.is_user: self._attach_u() else: @@ -604,19 +578,23 @@ def __init__(self): metavar="header", help="additional header files to include in the BPF program") self.args = parser.parse_args() + self.usdt_ctx = None def _create_probes(self): self.probes = [] for specifier in (self.args.countspecifier or []): - self.probes.append(Probe( - "freq", specifier, self.args.pid)) + self.probes.append(Probe(self, "freq", specifier)) for histspecifier in (self.args.histspecifier or []): - self.probes.append( - Probe("hist", histspecifier, self.args.pid)) + self.probes.append(Probe(self, "hist", histspecifier)) if len(self.probes) == 0: print("at least one specifier is required") exit() + def enable_usdt_probe(self, probe_name, fn_name): + if not self.usdt_ctx: + self.usdt_ctx = USDT(pid=self.args.pid) + self.usdt_ctx.enable_probe(probe_name, fn_name) + def _generate_program(self): bpf_source = """ struct __string_t { char s[%d]; }; @@ -632,8 +610,9 @@ def _generate_program(self): for probe in self.probes: bpf_source += probe.generate_text() if self.args.verbose: + if self.usdt_ctx: print(self.usdt_ctx.get_text()) print(bpf_source) - self.bpf = BPF(text=bpf_source) + self.bpf = BPF(text=bpf_source, usdt=self.usdt_ctx) def _attach(self): Tracepoint.attach(self.bpf) diff --git a/tools/trace.py b/tools/trace.py index 0fc192c33ce4..8fff394cd765 100755 --- a/tools/trace.py +++ b/tools/trace.py @@ -9,7 +9,7 @@ # Licensed under the Apache License, Version 2.0 (the "License") # Copyright (C) 2016 Sasha Goldshtein. -from bcc import BPF, Tracepoint, Perf, ProcUtils, USDTReader +from bcc import BPF, Tracepoint, Perf, ProcUtils, USDT from time import sleep, strftime import argparse import re