Skip to content

Commit

Permalink
Bytes/string encoding (iovisor#2004)
Browse files Browse the repository at this point in the history
* tools: uses 'replace' error handler by default in decode()

Tools might encouter characters from non utf-8 charset (e.g. a file
name). When this happen, it's better to replace the unexpected
character by a question mark than crash the tool when all we do is
to print the string.

* tools: fix a bytes/string issue in attach_perf_event()
  • Loading branch information
jeromemarchand authored and yonghong-song committed Oct 9, 2018
1 parent ec3fe90 commit b96ebcd
Show file tree
Hide file tree
Showing 39 changed files with 100 additions and 84 deletions.
2 changes: 1 addition & 1 deletion tools/bashreadline.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class Data(ct.Structure):
def print_event(cpu, data, size):
event = ct.cast(data, ct.POINTER(Data)).contents
print("%-9s %-6d %s" % (strftime("%H:%M:%S"), event.pid,
event.str.decode()))
event.str.decode('utf-8', 'replace')))

b["events"].open_perf_buffer(print_event)
while 1:
Expand Down
4 changes: 2 additions & 2 deletions tools/biosnoop.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,8 @@ def print_event(cpu, data, size):
delta = float(delta) + (event.ts - prev_ts)

print("%-14.9f %-14.14s %-6s %-7s %-2s %-9s %-7s %7.2f" % (
delta / 1000000, event.name.decode(), event.pid,
event.disk_name.decode(), rwflg, val,
delta / 1000000, event.name.decode('utf-8', 'replace'), event.pid,
event.disk_name.decode('utf-8', 'replace'), rwflg, val,
event.len, float(event.delta) / 1000000))

prev_ts = event.ts
Expand Down
4 changes: 2 additions & 2 deletions tools/biotop.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,8 +221,8 @@ def signal_ignore(signal, frame):
# print line
avg_ms = (float(v.us) / 1000) / v.io
print("%-6d %-16s %1s %-3d %-3d %-8s %5s %7s %6.2f" % (k.pid,
k.name.decode(), "W" if k.rwflag else "R", k.major, k.minor,
diskname, v.io, v.bytes / 1024, avg_ms))
k.name.decode('utf-8', 'replace'), "W" if k.rwflag else "R",
k.major, k.minor, diskname, v.io, v.bytes / 1024, avg_ms))

line += 1
if line >= maxrows:
Expand Down
10 changes: 6 additions & 4 deletions tools/btrfsslower.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,12 +316,14 @@ def print_event(cpu, data, size):

if (csv):
print("%d,%s,%d,%s,%d,%d,%d,%s" % (
event.ts_us, event.task.decode(), event.pid, type, event.size,
event.offset, event.delta_us, event.file.decode()))
event.ts_us, event.task.decode('utf-8', 'replace'), event.pid,
type, event.size, event.offset, event.delta_us,
event.file.decode('utf-8', 'replace')))
return
print("%-8s %-14.14s %-6s %1s %-7s %-8d %7.2f %s" % (strftime("%H:%M:%S"),
event.task.decode(), event.pid, type, event.size, event.offset / 1024,
float(event.delta_us) / 1000, event.file.decode()))
event.task.decode('utf-8', 'replace'), event.pid, type, event.size,
event.offset / 1024, float(event.delta_us) / 1000,
event.file.decode('utf-8', 'replace')))

# initialize BPF
b = BPF(text=bpf_text)
Expand Down
2 changes: 1 addition & 1 deletion tools/cachetop.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def get_processes_stats(
counts = bpf.get_table("counts")
stats = defaultdict(lambda: defaultdict(int))
for k, v in counts.items():
stats["%d-%d-%s" % (k.pid, k.uid, k.comm.decode())][k.ip] = v.value
stats["%d-%d-%s" % (k.pid, k.uid, k.comm.decode('utf-8', 'replace'))][k.ip] = v.value
stats_list = []

for pid, count in sorted(stats.items(), key=lambda stat: stat[0]):
Expand Down
4 changes: 2 additions & 2 deletions tools/capable.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,8 @@ def print_event(cpu, data, size):
else:
name = "?"
print("%-9s %-6d %-6d %-16s %-4d %-20s %d" % (strftime("%H:%M:%S"),
event.uid, event.pid, event.comm.decode(), event.cap, name,
event.audit))
event.uid, event.pid, event.comm.decode('utf-8', 'replace'),
event.cap, name, event.audit))

# loop with callback to print_event
b["events"].open_perf_buffer(print_event)
Expand Down
5 changes: 3 additions & 2 deletions tools/dcsnoop.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,9 @@ class Data(ct.Structure):
def print_event(cpu, data, size):
event = ct.cast(data, ct.POINTER(Data)).contents
print("%-11.6f %-6d %-16s %1s %s" % (
time.time() - start_ts, event.pid, event.comm.decode(),
mode_s[event.type], event.filename.decode()))
time.time() - start_ts, event.pid,
event.comm.decode('utf-8', 'replace'), mode_s[event.type],
event.filename.decode('utf-8', 'replace')))

# header
print("%-11s %-6s %-16s %1s %s" % ("TIME(s)", "PID", "COMM", "T", "FILE"))
Expand Down
10 changes: 6 additions & 4 deletions tools/ext4slower.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,12 +314,14 @@ def print_event(cpu, data, size):

if (csv):
print("%d,%s,%d,%s,%d,%d,%d,%s" % (
event.ts_us, event.task.decode(), event.pid, type, event.size,
event.offset, event.delta_us, event.file.decode()))
event.ts_us, event.task.decode('utf-8', 'replace'), event.pid,
type, event.size, event.offset, event.delta_us,
event.file.decode('utf-8', 'replace')))
return
print("%-8s %-14.14s %-6s %1s %-7s %-8d %7.2f %s" % (strftime("%H:%M:%S"),
event.task.decode(), event.pid, type, event.size, event.offset / 1024,
float(event.delta_us) / 1000, event.file.decode()))
event.task.decode('utf-8', 'replace'), event.pid, type, event.size,
event.offset / 1024, float(event.delta_us) / 1000,
event.file.decode('utf-8', 'replace')))

# initialize BPF
b = BPF(text=bpf_text)
Expand Down
3 changes: 2 additions & 1 deletion tools/filelife.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,8 @@ class Data(ct.Structure):
def print_event(cpu, data, size):
event = ct.cast(data, ct.POINTER(Data)).contents
print("%-8s %-6d %-16s %-7.2f %s" % (strftime("%H:%M:%S"), event.pid,
event.comm.decode(), float(event.delta) / 1000, event.fname.decode()))
event.comm.decode('utf-8', 'replace'), float(event.delta) / 1000,
event.fname.decode('utf-8', 'replace')))

b["events"].open_perf_buffer(print_event)
while 1:
Expand Down
6 changes: 3 additions & 3 deletions tools/fileslower.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,13 +240,13 @@ def print_event(cpu, data, size):
event = ct.cast(data, ct.POINTER(Data)).contents

ms = float(event.delta_us) / 1000
name = event.name.decode()
name = event.name.decode('utf-8', 'replace')
if event.name_len > DNAME_INLINE_LEN:
name = name[:-3] + "..."

print("%-8.3f %-14.14s %-6s %1s %-7s %7.2f %s" % (
time.time() - start_ts, event.comm.decode(), event.pid,
mode_s[event.mode], event.sz, ms, name))
time.time() - start_ts, event.comm.decode('utf-8', 'replace'),
event.pid, mode_s[event.mode], event.sz, ms, name))

b["events"].open_perf_buffer(print_event, page_cnt=64)
while 1:
Expand Down
7 changes: 4 additions & 3 deletions tools/filetop.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,14 +190,15 @@ def signal_ignore(signal, frame):
for k, v in reversed(sorted(counts.items(),
key=lambda counts:
getattr(counts[1], args.sort))):
name = k.name.decode()
name = k.name.decode('utf-8', 'replace')
if k.name_len > DNAME_INLINE_LEN:
name = name[:-3] + "..."

# print line
print("%-6d %-16s %-6d %-6d %-7d %-7d %1s %s" % (k.pid,
k.comm.decode(), v.reads, v.writes, v.rbytes / 1024,
v.wbytes / 1024, k.type.decode(), name))
k.comm.decode('utf-8', 'replace'), v.reads, v.writes,
v.rbytes / 1024, v.wbytes / 1024,
k.type.decode('utf-8', 'replace'), name))

line += 1
if line >= maxrows:
Expand Down
4 changes: 2 additions & 2 deletions tools/funcslower.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ def print_stack(event):
# print folded stack output
user_stack = list(user_stack)
kernel_stack = list(kernel_stack)
line = [event.comm.decode()] + \
line = [event.comm.decode('utf-8', 'replace')] + \
[b.sym(addr, event.tgid_pid) for addr in reversed(user_stack)] + \
(do_delimiter and ["-"] or []) + \
[b.ksym(addr) for addr in reversed(kernel_stack)]
Expand All @@ -323,7 +323,7 @@ def print_event(cpu, data, size):
ts = float(event.duration_ns) / time_multiplier
if not args.folded:
print((time_str(event) + "%-14.14s %-6s %7.2f %16x %s %s") %
(event.comm.decode(), event.tgid_pid >> 32,
(event.comm.decode('utf-8', 'replace'), event.tgid_pid >> 32,
ts, event.retval, args.functions[event.id], args_str(event)))
if args.user_stack or args.kernel_stack:
print_stack(event)
Expand Down
4 changes: 2 additions & 2 deletions tools/gethostlatency.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,8 @@ class Data(ct.Structure):
def print_event(cpu, data, size):
event = ct.cast(data, ct.POINTER(Data)).contents
print("%-9s %-6d %-16s %10.2f %s" % (strftime("%H:%M:%S"), event.pid,
event.comm.decode(), (float(event.delta) / 1000000),
event.host.decode()))
event.comm.decode('utf-8', 'replace'), (float(event.delta) / 1000000),
event.host.decode('utf-8', 'replace')))

# loop with callback to print_event
b["events"].open_perf_buffer(print_event)
Expand Down
2 changes: 1 addition & 1 deletion tools/hardirqs.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@
else:
print("%-26s %11s" % ("HARDIRQ", "TOTAL_" + label))
for k, v in sorted(dist.items(), key=lambda dist: dist[1].value):
print("%-26s %11d" % (k.name.decode(), v.value / factor))
print("%-26s %11d" % (k.name.decode('utf-8', 'replace'), v.value / factor))
dist.clear()

countdown -= 1
Expand Down
2 changes: 1 addition & 1 deletion tools/llcstat.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@
# This happens on some PIDs due to missed counts caused by sampling
hit = (v.value - miss) if (v.value >= miss) else 0
print('{:<8d} {:<16s} {:<4d} {:>12d} {:>12d} {:>6.2f}%'.format(
k.pid, k.name.decode(), k.cpu, v.value, miss,
k.pid, k.name.decode('utf-8', 'replace'), k.cpu, v.value, miss,
(float(hit) / float(v.value)) * 100.0))
print('Total References: {} Total Misses: {} Hit Rate: {:.2f}%'.format(
tot_ref, tot_miss, (float(tot_ref - tot_miss) / float(tot_ref)) * 100.0))
3 changes: 2 additions & 1 deletion tools/mdflush.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ class Data(ct.Structure):
def print_event(cpu, data, size):
event = ct.cast(data, ct.POINTER(Data)).contents
print("%-8s %-6d %-16s %s" % (strftime("%H:%M:%S"), event.pid,
event.comm.decode(), event.disk.decode()))
event.comm.decode('utf-8', 'replace'),
event.disk.decode('utf-8', 'replace')))

# read events
b["events"].open_perf_buffer(print_event)
Expand Down
4 changes: 2 additions & 2 deletions tools/mountsnoop.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,8 +382,8 @@ def print_event(mounts, umounts, cpu, data, size):
flags=decode_umount_flags(syscall['flags']),
retval=decode_errno(event.union.retval))
print('{:16} {:<7} {:<7} {:<11} {}'.format(
syscall['comm'].decode(), syscall['tgid'], syscall['pid'],
syscall['mnt_ns'], call))
syscall['comm'].decode('utf-8', 'replace'), syscall['tgid'],
syscall['pid'], syscall['mnt_ns'], call))
except KeyError:
# This might happen if we lost an event.
pass
Expand Down
4 changes: 2 additions & 2 deletions tools/nfsslower.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,13 +280,13 @@ def print_event(cpu, data, size):
return
print("%-8s %-14.14s %-6s %1s %-7s %-8d %7.2f %s" %
(strftime("%H:%M:%S"),
event.task.decode(),
event.task.decode('utf-8', 'replace'),
event.pid,
type,
event.size,
event.offset / 1024,
float(event.delta_us) / 1000,
event.file.decode()))
event.file.decode('utf-8', 'replace')))


# Currently specifically works for NFSv4, the other kprobes are generic
Expand Down
4 changes: 2 additions & 2 deletions tools/offcputime.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ def signal_ignore(signal, frame):
# print folded stack output
user_stack = list(user_stack)
kernel_stack = list(kernel_stack)
line = [k.name.decode()]
line = [k.name.decode('utf-8', 'replace')]
# if we failed to get the stack is, such as due to no space (-ENOMEM) or
# hash collision (-EEXIST), we still print a placeholder for consistency
if not args.kernel_stacks_only:
Expand Down Expand Up @@ -312,7 +312,7 @@ def signal_ignore(signal, frame):
else:
for addr in user_stack:
print(" %s" % b.sym(addr, k.tgid))
print(" %-16s %s (%d)" % ("-", k.name.decode(), k.pid))
print(" %-16s %s (%d)" % ("-", k.name.decode('utf-8', 'replace'), k.pid))
print(" %d\n" % v.value)

if missing_stacks > 0:
Expand Down
8 changes: 4 additions & 4 deletions tools/offwaketime.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ def signal_ignore(signal, frame):

if folded:
# print folded stack output
line = [k.target.decode()]
line = [k.target.decode('utf-8', 'replace')]
if not args.kernel_stacks_only:
if stack_id_err(k.t_u_stack_id):
line.append("[Missed User Stack]")
Expand Down Expand Up @@ -344,11 +344,11 @@ def signal_ignore(signal, frame):
else:
line.extend([b.sym(addr, k.w_tgid)
for addr in reversed(list(waker_user_stack))])
line.append(k.waker.decode())
line.append(k.waker.decode('utf-8', 'replace'))
print("%s %d" % (";".join(line), v.value))
else:
# print wakeup name then stack in reverse order
print(" %-16s %s %s" % ("waker:", k.waker.decode(), k.t_pid))
print(" %-16s %s %s" % ("waker:", k.waker.decode('utf-8', 'replace'), k.t_pid))
if not args.kernel_stacks_only:
if stack_id_err(k.w_u_stack_id):
print(" [Missed User Stack]")
Expand Down Expand Up @@ -381,7 +381,7 @@ def signal_ignore(signal, frame):
else:
for addr in target_user_stack:
print(" %s" % b.sym(addr, k.t_tgid))
print(" %-16s %s %s" % ("target:", k.target.decode(), k.w_pid))
print(" %-16s %s %s" % ("target:", k.target.decode('utf-8', 'replace'), k.w_pid))
print(" %d\n" % v.value)

if missing_stacks > 0:
Expand Down
2 changes: 1 addition & 1 deletion tools/old/offcputime.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def signal_ignore(signal, frame):
for k, v in sorted(counts.items(), key=lambda counts: counts[1].value):
if folded:
# print folded stack output
line = k.name.decode() + ";"
line = k.name.decode('utf-8', 'replace') + ";"
for i in reversed(range(0, maxdepth)):
if k.ret[i] == 0:
continue
Expand Down
4 changes: 2 additions & 2 deletions tools/old/oomkill.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ def print_event(cpu, data, size):
avgline = stats.read().rstrip()
print(("%s Triggered by PID %d (\"%s\"), OOM kill of PID %d (\"%s\")"
", %d pages, loadavg: %s") % (strftime("%H:%M:%S"), event.fpid,
event.fcomm.decode(), event.tpid, event.tcomm.decode(), event.pages,
avgline))
event.fcomm.decode('utf-8', 'replace'), event.tpid,
event.tcomm.decode('utf-8', 'replace'), event.pages, avgline))

# initialize BPF
b = BPF(text=bpf_text)
Expand Down
2 changes: 1 addition & 1 deletion tools/old/profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ def aksym(addr):
# print folded stack output
user_stack = list(user_stack)
kernel_stack = list(kernel_stack)
line = [k.name.decode()] + \
line = [k.name.decode('utf-8', 'replace')] + \
[b.sym(addr, k.pid) for addr in reversed(user_stack)] + \
(do_delimiter and ["-"] or []) + \
[aksym(addr) for addr in reversed(kernel_stack)]
Expand Down
8 changes: 4 additions & 4 deletions tools/old/wakeuptime.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,23 +199,23 @@ def signal_ignore(signal, frame):
for k, v in sorted(counts.items(), key=lambda counts: counts[1].value):
if folded:
# print folded stack output
line = k.waker.decode() + ";"
line = k.waker.decode('utf-8', 'replace') + ";"
for i in reversed(range(0, maxdepth)):
if k.ret[i] == 0:
continue
line = line + b.ksym(k.ret[i])
if i != 0:
line = line + ";"
print("%s;%s %d" % (line, k.target.decode(), v.value))
print("%s;%s %d" % (line, k.target.decode('utf-8', 'replace'), v.value))
else:
# print default multi-line stack output
print(" %-16s %s" % ("target:", k.target.decode()))
print(" %-16s %s" % ("target:", k.target.decode('utf-8', 'replace')))
for i in range(0, maxdepth):
if k.ret[i] == 0:
break
print(" %-16x %s" % (k.ret[i],
b.ksym(k.ret[i])))
print(" %-16s %s" % ("waker:", k.waker.decode()))
print(" %-16s %s" % ("waker:", k.waker.decode('utf-8', 'replace')))
print(" %d\n" % v.value)
counts.clear()

Expand Down
4 changes: 2 additions & 2 deletions tools/oomkill.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ def print_event(cpu, data, size):
avgline = stats.read().rstrip()
print(("%s Triggered by PID %d (\"%s\"), OOM kill of PID %d (\"%s\")"
", %d pages, loadavg: %s") % (strftime("%H:%M:%S"), event.fpid,
event.fcomm.decode(), event.tpid, event.tcomm.decode(), event.pages,
avgline))
event.fcomm.decode('utf-8', 'replace'), event.tpid,
event.tcomm.decode('utf-8', 'replace'), event.pages, avgline))

# initialize BPF
b = BPF(text=bpf_text)
Expand Down
3 changes: 2 additions & 1 deletion tools/opensnoop.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,8 @@ def print_event(cpu, data, size):

print("%-6d %-16s %4d %3d %s" %
(event.id & 0xffffffff if args.tid else event.id >> 32,
event.comm.decode(), fd_s, err, event.fname.decode()))
event.comm.decode('utf-8', 'replace'), fd_s, err,
event.fname.decode('utf-8', 'replace')))

# loop with callback to print_event
b["events"].open_perf_buffer(print_event, page_cnt=64)
Expand Down
8 changes: 4 additions & 4 deletions tools/profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def signal_ignore(signal, frame):

def aksym(addr):
if args.annotations:
return b.ksym(addr) + "_[k]"
return b.ksym(addr) + "_[k]".encode()
else:
return b.ksym(addr)

Expand Down Expand Up @@ -320,7 +320,7 @@ def aksym(addr):
line.append("[Missed Kernel Stack]")
else:
line.extend([b.ksym(addr) for addr in reversed(kernel_stack)])
print("%s %d" % (b";".join(line).decode(), v.value))
print("%s %d" % (b";".join(line).decode('utf-8', 'replace'), v.value))
else:
# print default multi-line stack output
if not args.user_stacks_only:
Expand All @@ -336,8 +336,8 @@ def aksym(addr):
print(" [Missed User Stack]")
else:
for addr in user_stack:
print(" %s" % b.sym(addr, k.pid).decode())
print(" %-16s %s (%d)" % ("-", k.name.decode(), k.pid))
print(" %s" % b.sym(addr, k.pid).decode('utf-8', 'replace'))
print(" %-16s %s (%d)" % ("-", k.name.decode('utf-8', 'replace'), k.pid))
print(" %d\n" % v.value)

# check missing
Expand Down
5 changes: 3 additions & 2 deletions tools/sslsniff.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,8 +221,9 @@ def print_event(cpu, data, size, rw):
" bytes lost) " + "-" * 5

fmt = "%-12s %-18.9f %-16s %-6d %-6d\n%s\n%s\n%s\n\n"
print(fmt % (rw, time_s, event.comm.decode(), event.pid, event.len, s_mark,
event.v0.decode(), e_mark))
print(fmt % (rw, time_s, event.comm.decode('utf-8', 'replace'),
event.pid, event.len, s_mark,
event.v0.decode('utf-8', 'replace'), e_mark))

b["perf_SSL_write"].open_perf_buffer(print_event_write)
b["perf_SSL_read"].open_perf_buffer(print_event_read)
Expand Down
Loading

0 comments on commit b96ebcd

Please sign in to comment.