Skip to content

Commit

Permalink
Moved user symbol decoding from memleak into bcc module
Browse files Browse the repository at this point in the history
  • Loading branch information
goldshtn committed Mar 21, 2016
1 parent 3bc2828 commit 0e856f4
Show file tree
Hide file tree
Showing 2 changed files with 136 additions and 95 deletions.
120 changes: 119 additions & 1 deletion src/python/bcc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import multiprocessing
import os
import re
from subprocess import Popen, PIPE
from subprocess import Popen, PIPE, STDOUT
import struct
import sys
basestring = (unicode if sys.version_info[0] < 3 else str)
Expand Down Expand Up @@ -747,6 +747,124 @@ def ksymname(name):
return 0
return ksyms[idx][1]

class ProcessSymbols(object):
def __init__(self, pid):
"""
Initializes the process symbols store for the specified pid.
Call refresh_code_ranges() periodically if you anticipate changes
in the set of loaded libraries or their addresses.
"""
self.pid = pid
self.ranges_cache = {}
self.refresh_code_ranges()

def refresh_code_ranges(self):
self.code_ranges = self._get_code_ranges()

@staticmethod
def _is_binary_segment(parts):
return len(parts) == 6 and parts[5][0] != '[' and 'x' in parts[1]

def _get_code_ranges(self):
ranges = {}
raw_ranges = open("/proc/%d/maps" % self.pid).readlines()
# A typical line from /proc/PID/maps looks like this:
# 7f21b6635000-7f21b67eb000 r-xp ... /usr/lib64/libc-2.21.so
# We are looking for executable segments that have a .so file
# or the main executable. The first two lines are the range of
# that memory segment, which we index by binary name.
for raw_range in raw_ranges:
parts = raw_range.split()
if not BPF.ProcessSymbols._is_binary_segment(parts):
continue
binary = parts[5]
range_parts = parts[0].split('-')
addr_range = (int(range_parts[0], 16), int(range_parts[1], 16))
ranges[binary] = addr_range
return ranges

@staticmethod
def _is_function_symbol(parts):
return len(parts) == 6 and parts[3] == ".text" and parts[2] == "F"

@staticmethod
def _run_command_get_output(command):
p = Popen(command.split(), stdout=PIPE, stderr=STDOUT)
return iter(p.stdout.readline, b'')

def _get_sym_ranges(self, binary):
if binary in self.ranges_cache:
return self.ranges_cache[binary]
sym_ranges = {}
raw_symbols = BPF.ProcessSymbols._run_command_get_output(
"objdump -t %s" % binary)
for raw_symbol in raw_symbols:
# A typical line from objdump -t looks like this:
# 00000000004007f5 g F .text 000000000000010e main
# We only care about functions in the .text segment.
# The first number is the start address, and the second
# number is the length.
parts = raw_symbol.split()
if not BPF.ProcessSymbols._is_function_symbol(parts):
continue
sym_start = int(parts[0], 16)
sym_len = int(parts[4], 16)
sym_name = parts[5]
sym_ranges[sym_name] = (sym_start, sym_len)
self.ranges_cache[binary] = sym_ranges
return sym_ranges

def _decode_sym(self, binary, offset):
sym_ranges = self._get_sym_ranges(binary)
# Find the symbol that contains the specified offset.
# There might not be one.
for name, (start, length) in sym_ranges.items():
if offset >= start and offset <= (start + length):
return "%s+0x%x" % (name, offset - start)
return "%x" % offset

def decode_addr(self, addr):
"""
Given an address, return the best symbolic representation of it.
If it doesn't fall in any module, return its hex string. If it
falls within a module but we don't have a symbol for it, return
the hex string and the module. If we do have a symbol for it,
return the symbol and the module, e.g. "readline+0x10 [bash]".
"""
code_ranges = self._get_code_ranges()
# Find the binary that contains the specified address.
# For .so files, look at the relative address; for the main
# executable, look at the absolute address.
for binary, (start, end) in code_ranges.items():
if addr >= start and addr <= end:
offset = addr - start \
if binary.endswith(".so") else addr
return "%s [%s]" % (self._decode_sym(binary, offset),
binary)
return "%x" % addr

@classmethod
def usymaddr(cls, pid, addr, refresh_symbols=False):
"""usymaddr(pid, addr, refresh_symbols=False)
Decode the specified address in the specified process to a symbolic
representation that includes the symbol name, offset within the symbol,
and the module name. See the ProcessSymbols class for more details.
Specify refresh_symbols=True if you suspect the set of loaded modules
or their load addresses has changed since the last time you called
usymaddr() on this pid.
"""
proc_sym = None
if pid in cls._process_symbols:
proc_sym = cls._process_symbols[pid]
if refresh_symbols:
proc_sym.refresh_code_ranges()
else:
proc_sym = ProcessSymbols(pid)
cls._process_symbols[pid] = proc_sym
return proc_sym.decode_addr(addr)

@staticmethod
def num_open_kprobes():
"""num_open_kprobes()
Expand Down
111 changes: 17 additions & 94 deletions tools/memleak.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
#!/usr/bin/env python
#
# memleak Trace and display outstanding allocations to detect
# memory leaks in user-mode processes and the kernel.
# memleak Trace and display outstanding allocations to detect
# memory leaks in user-mode processes and the kernel.
#
# USAGE: memleak [-h] [-p PID] [-t] [-a] [-o OLDER] [-c COMMAND]
# [-s SAMPLE_RATE] [-d STACK_DEPTH] [-T TOP] [-z MIN_SIZE]
# [-Z MAX_SIZE]
# [interval] [count]
# [-s SAMPLE_RATE] [-d STACK_DEPTH] [-T TOP] [-z MIN_SIZE]
# [-Z MAX_SIZE]
# [interval] [count]
#
# Licensed under the Apache License, Version 2.0 (the "License")
# Copyright (C) 2016 Sasha Goldshtein.
Expand Down Expand Up @@ -45,88 +45,14 @@ def monotonic_time():
return t.tv_sec * 1e9 + t.tv_nsec

class StackDecoder(object):
def __init__(self, pid, bpf):
def __init__(self, pid):
self.pid = pid
self.bpf = bpf
self.ranges_cache = {}
self.refresh_code_ranges()
if pid != -1:
self.proc_sym = BPF.ProcessSymbols(pid)

def refresh_code_ranges(self):
if self.pid == -1:
return
self.code_ranges = self._get_code_ranges()

@staticmethod
def _is_binary_segment(parts):
return len(parts) == 6 and \
parts[5][0] != '[' and 'x' in parts[1]

def _get_code_ranges(self):
ranges = {}
raw_ranges = open("/proc/%d/maps" % self.pid).readlines()
# A typical line from /proc/PID/maps looks like this:
# 7f21b6635000-7f21b67eb000 r-xp ... /usr/lib64/libc-2.21.so
# We are looking for executable segments that have a .so file
# or the main executable. The first two lines are the range of
# that memory segment, which we index by binary name.
for raw_range in raw_ranges:
parts = raw_range.split()
if not StackDecoder._is_binary_segment(parts):
continue
binary = parts[5]
range_parts = parts[0].split('-')
addr_range = (int(range_parts[0], 16),
int(range_parts[1], 16))
ranges[binary] = addr_range
return ranges

@staticmethod
def _is_function_symbol(parts):
return len(parts) == 6 and parts[3] == ".text" \
and parts[2] == "F"

def _get_sym_ranges(self, binary):
if binary in self.ranges_cache:
return self.ranges_cache[binary]
sym_ranges = {}
raw_symbols = run_command_get_output("objdump -t %s" % binary)
for raw_symbol in raw_symbols:
# A typical line from objdump -t looks like this:
# 00000000004007f5 g F .text 000000000000010e main
# We only care about functions in the .text segment.
# The first number is the start address, and the second
# number is the length.
parts = raw_symbol.split()
if not StackDecoder._is_function_symbol(parts):
continue
sym_start = int(parts[0], 16)
sym_len = int(parts[4], 16)
sym_name = parts[5]
sym_ranges[sym_name] = (sym_start, sym_len)
self.ranges_cache[binary] = sym_ranges
return sym_ranges

def _decode_sym(self, binary, offset):
sym_ranges = self._get_sym_ranges(binary)
# Find the symbol that contains the specified offset.
# There might not be one.
for name, (start, length) in sym_ranges.items():
if offset >= start and offset <= (start + length):
return "%s+0x%x" % (name, offset - start)
return "%x" % offset

def _decode_addr(self, addr):
code_ranges = self._get_code_ranges()
# Find the binary that contains the specified address.
# For .so files, look at the relative address; for the main
# executable, look at the absolute address.
for binary, (start, end) in code_ranges.items():
if addr >= start and addr <= end:
offset = addr - start \
if binary.endswith(".so") else addr
return "%s [%s]" % (self._decode_sym(binary,
offset), binary)
return "%x" % addr
def refresh(self):
if self.pid != -1:
self.proc_sym.refresh_code_ranges()

def decode_stack(self, info, is_kernel_trace):
stack = ""
Expand All @@ -136,13 +62,10 @@ def decode_stack(self, info, is_kernel_trace):
addr = info.callstack[i]
if is_kernel_trace:
stack += " %s [kernel] (%x) ;" % \
(self.bpf.ksym(addr), addr)
(BPF.ksym(addr), addr)
else:
# At some point, we hope to have native BPF
# user-mode symbol decoding, but for now we
# have to use our own.
stack += " %s (%x) ;" % \
(self._decode_addr(addr), addr)
(self.proc_sym.decode_addr(addr), addr)
return stack

def run_command_get_output(command):
Expand Down Expand Up @@ -302,7 +225,7 @@ def run_command_get_pid(command):
info.timestamp_ns = bpf_ktime_get_ns();
info.num_frames = grab_stack(ctx, &info) - 2;
allocs.update(&address, &info);
if (SHOULD_PRINT) {
bpf_trace_printk("alloc exited, size = %lu, result = %lx, frames = %d\\n",
info.size, address, info.num_frames);
Expand All @@ -325,7 +248,7 @@ def run_command_get_pid(command):
}
return 0;
}
"""
"""
bpf_source = bpf_source.replace("SHOULD_PRINT", "1" if trace_all else "0")
bpf_source = bpf_source.replace("SAMPLE_EVERY_N", str(sample_every_n))
bpf_source = bpf_source.replace("GRAB_ONE_FRAME", max_stack_size *
Expand Down Expand Up @@ -358,7 +281,7 @@ def run_command_get_pid(command):
bpf_program.attach_kretprobe(event="__kmalloc", fn_name="alloc_exit")
bpf_program.attach_kprobe(event="kfree", fn_name="free_enter")

decoder = StackDecoder(pid, bpf_program)
decoder = StackDecoder(pid)

def print_outstanding():
stacks = {}
Expand Down Expand Up @@ -391,7 +314,7 @@ def print_outstanding():
sleep(interval)
except KeyboardInterrupt:
exit()
decoder.refresh_code_ranges()
decoder.refresh()
print_outstanding()
count_so_far += 1
if num_prints is not None and count_so_far >= num_prints:
Expand Down

0 comments on commit 0e856f4

Please sign in to comment.