Decode bytes when formatting them as strings

USDTProbe objects (and USDTProbeArguments and USDTProbeLocations) are instantiated with data that's sourced from libccc calls. That means that their attributes are bytes-typed, not string-typed. When a bytes-typed value is rendered into a string with Python's '%s' formatting directive, it gets a wrapped in single quotes and prefixed with b. For example, b'probe-location'. This is visually noisy, but also breaks some tool behavior which uses string-formatted values for stuff like filters. This is only an issue in Python 3. In Python 2, the bytes type is just an alias for the string type, and so byte sequences from libcc were implicitly decoded as ASCII text.
yingtaojuzi · Jun 16, 2021 · dc1bceb · dc1bceb
1 parent 88ca330
commit dc1bceb
Show file tree

Hide file tree

Showing 2 changed files with 10 additions and 10 deletions.
diff --git a/src/python/bcc/usdt.py b/src/python/bcc/usdt.py
@@ -50,24 +50,24 @@ def _format(self):
  if self.valid & BCC_USDT_ARGUMENT_FLAGS.CONSTANT != 0:
  return "%d" % self.constant
  if self.valid & BCC_USDT_ARGUMENT_FLAGS.DEREF_OFFSET == 0:
- return "%s" % self.base_register_name
+ return "%s" % self.base_register_name.decode()
  if self.valid & BCC_USDT_ARGUMENT_FLAGS.DEREF_OFFSET != 0 and \
  self.valid & BCC_USDT_ARGUMENT_FLAGS.DEREF_IDENT == 0:
  if self.valid & BCC_USDT_ARGUMENT_FLAGS.INDEX_REGISTER_NAME != 0:
- index_offset = " + %s" % self.index_register_name
+ index_offset = " + %s" % self.index_register_name.decode()
  if self.valid & BCC_USDT_ARGUMENT_FLAGS.SCALE != 0:
  index_offset += " * %d" % self.scale
  else:
  index_offset = ""
  sign = '+' if self.deref_offset >= 0 else '-'
- return "*(%s %s %d%s)" % (self.base_register_name,
- sign, abs(self.deref_offset), index_offset)
+ return "*(%s %s %d%s)" % (self.base_register_name.decode(),
+  sign, abs(self.deref_offset), index_offset)
  if self.valid & BCC_USDT_ARGUMENT_FLAGS.DEREF_OFFSET != 0 and \
  self.valid & BCC_USDT_ARGUMENT_FLAGS.DEREF_IDENT != 0 and \
  self.valid & BCC_USDT_ARGUMENT_FLAGS.BASE_REGISTER_NAME != 0 and \
  self.base_register_name == "ip":
  sign = '+' if self.deref_offset >= 0 else '-'
- return "*(&%s %s %d)" % (self.deref_ident,
+ return "*(&%s %s %d)" % (self.deref_ident.decode(),
  sign, abs(self.deref_offset))
  # If we got here, this is an unrecognized case. Doesn't mean it's
  # necessarily bad, so just provide the raw data. It just means that
@@ -86,7 +86,7 @@ def __init__(self, probe, index, location):
  self.bin_path = location.bin_path
 
  def __str__(self):
- return "%s 0x%x" % (self.bin_path, self.address)
+ return "%s 0x%x" % (self.bin_path.decode(), self.address)
 
  def get_argument(self, index):
  arg = bcc_usdt_argument()
@@ -111,10 +111,10 @@ def __init__(self, context, probe):
 
  def __str__(self):
  return "%s:%s [sema 0x%x]" % \
- (self.provider, self.name, self.semaphore)
+ (self.provider.decode(), self.name.decode(), self.semaphore)
 
  def short_name(self):
- return "%s:%s" % (self.provider, self.name)
+ return "%s:%s" % (self.provider.decode(), self.name.decode())
 
  def get_location(self, index):
  loc = bcc_usdt_location()

diff --git a/tools/tplist.py b/tools/tplist.py
@@ -80,8 +80,8 @@ def print_usdt_details(probe):
  print(" %d location(s)" % probe.num_locations)
  print(" %d argument(s)" % probe.num_arguments)
  else:
- print("%s %s:%s" %
- (probe.bin_path, probe.provider, probe.name))
+ print("%s %s" %
+ (probe.bin_path.decode(), probe.short_name()))
 
 def print_usdt(pid, lib):
  reader = USDT(path=lib, pid=pid)