Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New tool: memleak #355

Merged
merged 13 commits into from
Feb 8, 2016
Prev Previous commit
Next Next commit
Added -c switch to run a command and trace that process
  • Loading branch information
goldshtn committed Feb 8, 2016
commit 751fce5af9efe20b23859a159b4473dbf24a0d71
52 changes: 31 additions & 21 deletions tools/memleak.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import os

class Time(object):
# BPF timestamps come from the monotonic clock. To be able to filter
# and compare them from Python, we need to invoke clock_gettime from librt.
# Adapted from http:https://stackoverflow.com/a/1205762
CLOCK_MONOTONIC_RAW = 4 # see <linux/time.h>

Expand All @@ -23,9 +25,6 @@ class timespec(ctypes.Structure):

@staticmethod
def monotonic_time():
"""monotonic_time()
Returns the reading of the monotonic clock, in nanoseconds.
"""
t = Time.timespec()
if Time.clock_gettime(Time.CLOCK_MONOTONIC_RAW , ctypes.pointer(t)) != 0:
errno_ = ctypes.get_errno()
Expand All @@ -34,18 +33,12 @@ def monotonic_time():

class StackDecoder(object):
def __init__(self, pid, bpf):
"""
TODO
"""
self.pid = pid
self.bpf = bpf
self.ranges_cache = {}
self.refresh_code_ranges()

def refresh_code_ranges(self):
"""
TODO
"""
if self.pid == -1:
return
self.code_ranges = self._get_code_ranges()
Expand All @@ -54,6 +47,11 @@ def _get_code_ranges(self):
ranges = {}
raw_ranges = open("/proc/%d/maps" % self.pid).readlines()
for raw_range in raw_ranges:
# A typical line from /proc/PID/maps looks like this:
# 7f21b6635000-7f21b67eb000 r-xp 00000000 fd:00 1442606 /usr/lib64/libc-2.21.so
# We are looking for executable segments that have a binary (.so
# or the main executable). The first two lines are the range of
# that memory segment, which we index by binary name.
parts = raw_range.split()
if len(parts) < 6 or parts[5][0] == '[' or not 'x' in parts[1]:
continue
Expand All @@ -67,8 +65,12 @@ def _get_sym_ranges(self, binary):
if binary in self.ranges_cache:
return self.ranges_cache[binary]
sym_ranges = {}
raw_symbols = run_command("objdump -t %s" % binary)
raw_symbols = run_command_get_output("objdump -t %s" % binary)
for raw_symbol in raw_symbols:
# A typical line from objdump -t looks like this:
# 00000000004007f5 g F .text 000000000000010e main
# We only care about functions (F) in the .text segment. The first
# number is the start address, and the second number is the length.
parts = raw_symbol.split()
if len(parts) < 6 or parts[3] != ".text" or parts[2] != "F":
continue
Expand All @@ -81,38 +83,45 @@ def _get_sym_ranges(self, binary):

def _decode_sym(self, binary, offset):
sym_ranges = self._get_sym_ranges(binary)
# Find the symbol that contains the specified offset. There might not be one.
for name, (start, length) in sym_ranges.items():
if offset >= start and offset <= (start + length):
return "%s+0x%x" % (name, offset - start)
return "%x" % offset

def _decode_addr(self, addr):
code_ranges = self._get_code_ranges()
# Find the binary that contains the specified address. For .so files, look
# at the relative address; for the main executable, look at the absolute
# address.
for binary, (start, end) in code_ranges.items():
if addr >= start and addr <= end:
offset = addr - start if binary.endswith(".so") else addr
return "%s [%s]" % (self._decode_sym(binary, offset), binary)
return "%x" % addr

def decode_stack(self, info):
"""
TODO
"""
def decode_stack(self, info, is_kernel_trace):
stack = ""
if info.num_frames <= 0:
return "???"
for i in range(0, info.num_frames):
addr = info.callstack[i]
if kernel_trace:
if is_kernel_trace:
stack += " %s [kernel] (%x) ;" % (self.bpf.ksym(addr), addr)
else:
# At some point, we hope to have native BPF user-mode symbol
# decoding, but for now we have to use our own
stack += " %s (%x) ;" % (self._decode_addr(addr), addr)
return stack

def run_command(command):
def run_command_get_output(command):
p = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
return iter(p.stdout.readline, b'')

def run_command_get_pid(command):
p = subprocess.Popen(command.split())
return p.pid

examples = """
EXAMPLES:

Expand Down Expand Up @@ -166,7 +175,8 @@ def run_command(command):
min_age_ns = 1e6*int(args.older)

if not command is None:
pass # TODO Run command, get its pid and trace that
print("Executing '%s' and tracing the resulting process." % command)
pid = run_command_get_pid(command)

bpf_source = open("memleak.c").read()
bpf_source = bpf_source.replace("SHOULD_PRINT", "1" if trace_all else "0")
Expand All @@ -190,19 +200,19 @@ def print_outstanding():
stacks = {}
print("*** Outstanding allocations:")
allocs = bpf_program.get_table("allocs")
for address, info in sorted(allocs.items(), key=lambda a: -a[1].size):
for address, info in sorted(allocs.items(), key=lambda a: a[1].size):
if Time.monotonic_time() - min_age_ns < info.timestamp_ns:
continue
stack = decoder.decode_stack(info)
stack = decoder.decode_stack(info, kernel_trace)
if stack in stacks: stacks[stack] = (stacks[stack][0] + 1, stacks[stack][1] + info.size)
else: stacks[stack] = (1, info.size)
if args.show_allocs:
print("\taddr = %x size = %s" % (address.value, info.size))
for stack, (count, size) in sorted(stacks.items(), key=lambda s: -s[1][1]):
for stack, (count, size) in sorted(stacks.items(), key=lambda s: s[1][1]):
print("\t%d bytes in %d allocations from stack\n\t\t%s" % (size, count, stack.replace(";", "\n\t\t")))

while True:
if trace_all:
if trace_all:
print bpf_program.trace_fields()
else:
try:
Expand Down