Skip to content

Commit

Permalink
cc: Resolve symbols from external debuginfo
Browse files Browse the repository at this point in the history
Adds support for resolving symbols using external debuginfo files,
which can be retrieved from two locations. First, check the build-id
of the desired binary and look in /usr/lib/debug/.build-id according
to the build-id structure. Second, check the debuglink section of
the desired binary and look in /usr/lib/debug or in the binary's
current directory. These are the rules applied by GDB as well, but
GDB lets the user reconfigure the debug directory path from
/usr/lib/debug to something else; we do not support this.

These changes are based on the following description of how GDB
resolves external debuginfo:

https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html
  • Loading branch information
goldshtn committed Feb 21, 2017
1 parent 7d8c29c commit 0155385
Show file tree
Hide file tree
Showing 11 changed files with 263 additions and 30 deletions.
4 changes: 2 additions & 2 deletions docs/reference_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -1107,9 +1107,9 @@ Examples in situ:

### 3. sym()

Syntax: ```BPF.sym(addr, pid, show_module=False, show_address=True)```
Syntax: ```BPF.sym(addr, pid, show_module=False, show_offset=False)```

Translate a memory address into a function name for a pid, which is returned. A pid of less than zero will access the kernel symbol cache. The `show_module` and `show_address` parameters control whether the module in which the symbol lies should be displayed, and whether the instruction offset from the beginning of the symbol should be displayed. These extra parameters default to `False`.
Translate a memory address into a function name for a pid, which is returned. A pid of less than zero will access the kernel symbol cache. The `show_module` and `show_offset` parameters control whether the module in which the symbol lies should be displayed, and whether the instruction offset from the beginning of the symbol should be displayed. These extra parameters default to `False`.

Example:

Expand Down
2 changes: 1 addition & 1 deletion examples/tracing/mallocstacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,4 @@
for k, v in reversed(sorted(calls.items(), key=lambda c: c[1].value)):
print("%d bytes allocated at:" % v.value)
for addr in stack_traces.walk(k.value):
print("\t%s" % b.sym(addr, pid, show_address=True))
print("\t%s" % b.sym(addr, pid, show_offset=True))
247 changes: 240 additions & 7 deletions src/cc/bcc_elf.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,14 @@
*/
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
#include <libgen.h>
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>

#include <gelf.h>
#include "bcc_elf.h"
Expand Down Expand Up @@ -196,20 +201,248 @@ static int listsymbols(Elf *e, bcc_elf_symcb callback, void *payload) {
return 0;
}

int bcc_elf_foreach_sym(const char *path, bcc_elf_symcb callback,
void *payload) {
static Elf_Data * get_section_elf_data(Elf *e, const char *section_name) {
Elf_Scn *section = NULL;
GElf_Shdr header;
char *name;

size_t stridx;
if (elf_getshdrstrndx(e, &stridx) != 0)
return NULL;

while ((section = elf_nextscn(e, section)) != 0) {
if (!gelf_getshdr(section, &header))
continue;

name = elf_strptr(e, stridx, header.sh_name);
if (name && !strcmp(name, section_name)) {
return elf_getdata(section, NULL);
}
}

return NULL;
}

static int find_debuglink(Elf *e, char **debug_file, unsigned int *crc) {
Elf_Data *data = NULL;

*debug_file = NULL;
*crc = 0;

data = get_section_elf_data(e, ".gnu_debuglink");
if (!data || data->d_size <= 5)
return 0;

*debug_file = (char *)data->d_buf;
*crc = *(unsigned int*)((char *)data->d_buf + data->d_size - 4);

return *debug_file ? 1 : 0;
}

static int find_buildid(Elf *e, char *buildid) {
Elf_Data *data = get_section_elf_data(e, ".note.gnu.build-id");
if (data->d_size <= 16 || strcmp((char *)data->d_buf + 12, "GNU"))
return 0;

char *buf = (char *)data->d_buf + 16;
size_t length = data->d_size - 16;
for (size_t i = 0; i < length; ++i) {
sprintf(buildid + (i * 2), "%02hhx", buf[i]);
}

return 1;
}

// The CRC algorithm used by GNU debuglink. Taken from:
// https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html
static unsigned int gnu_debuglink_crc32(unsigned int crc,
char *buf, size_t len) {
static const unsigned int crc32_table[256] =
{
0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419,
0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4,
0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07,
0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856,
0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4,
0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3,
0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a,
0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599,
0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190,
0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f,
0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e,
0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed,
0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3,
0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a,
0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5,
0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010,
0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17,
0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6,
0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615,
0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344,
0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a,
0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1,
0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c,
0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef,
0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe,
0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31,
0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c,
0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b,
0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1,
0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278,
0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7,
0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66,
0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605,
0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8,
0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b,
0x2d02ef8d
};
char *end;

crc = ~crc & 0xffffffff;
for (end = buf + len; buf < end; ++buf)
crc = crc32_table[(crc ^ *buf) & 0xff] ^ (crc >> 8);
return ~crc & 0xffffffff;
}

static int verify_checksum(const char *file, unsigned int crc) {
struct stat st;
int fd;
void *buf;
unsigned int actual;

fd = open(file, O_RDONLY);
if (fd < 0)
return 0;

if (fstat(fd, &st) < 0)
return 0;

buf = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (!buf) {
close(fd);
return 0;
}

actual = gnu_debuglink_crc32(0, buf, st.st_size);

munmap(buf, st.st_size);
close(fd);
return actual == crc;
}

static char *find_debug_via_debuglink(Elf *e, const char *binpath) {
char fullpath[PATH_MAX];
char *bindir = NULL;
char *res = NULL;
unsigned int crc;
char *name; // the name of the debuginfo file

if (!find_debuglink(e, &name, &crc))
return NULL;

bindir = strdup(binpath);
bindir = dirname(bindir);

// Search for the file in 'binpath'
sprintf(fullpath, "%s/%s", bindir, name);
if (access(fullpath, F_OK) != -1) {
res = strdup(fullpath);
goto DONE;
}

// Search for the file in 'binpath'/.debug
sprintf(fullpath, "%s/.debug/%s", bindir, name);
if (access(fullpath, F_OK) != -1) {
res = strdup(fullpath);
goto DONE;
}

// Search for the file in the global debug directory /usr/lib/debug/'binpath'
sprintf(fullpath, "/usr/lib/debug%s/%s", bindir, name);
if (access(fullpath, F_OK) != -1) {
res = strdup(fullpath);
goto DONE;
}

DONE:
free(bindir);
if (verify_checksum(res, crc))
return res;
return NULL;
}

static char *find_debug_via_buildid(Elf *e) {
char fullpath[PATH_MAX];
char buildid[128]; // currently 40 seems to be default, let's be safe

if (!find_buildid(e, buildid))
return NULL;

// Search for the file in the global debug directory with a sub-path:
// mm/nnnnnn...nnnn.debug
// Where mm are the first two characters of the buildid, and nnnn are the
// rest of the build id, followed by .debug.
sprintf(fullpath, "/usr/lib/debug/.build-id/%c%c/%s.debug",
buildid[0], buildid[1], buildid + 2);
if (access(fullpath, F_OK) != -1) {
return strdup(fullpath);
}

return NULL;
}

static int foreach_sym_core(const char *path, bcc_elf_symcb callback,
void *payload, int is_debug_file) {
Elf *e;
int fd, res;
char *debug_file;

if (openelf(path, &e, &fd) < 0)
return -1;

// If there is a separate debuginfo file, try to locate and read it, first
// using the build-id section, then using the debuglink section. These are
// also the rules that GDB folows.
// See: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html
if (!is_debug_file) {
// The is_debug_file argument helps avoid infinitely resolving debuginfo
// files for debuginfo files and so on.
debug_file = find_debug_via_buildid(e);
if (!debug_file)
debug_file = find_debug_via_debuglink(e, path);
if (debug_file) {
foreach_sym_core(debug_file, callback, payload, 1);
free(debug_file);
}
}

res = listsymbols(e, callback, payload);
elf_end(e);
close(fd);
return res;
}

int bcc_elf_foreach_sym(const char *path, bcc_elf_symcb callback,
void *payload) {
return foreach_sym_core(path, callback, payload, 0);
}

static int loadaddr(Elf *e, uint64_t *addr) {
size_t phnum, i;

Expand Down Expand Up @@ -268,11 +501,11 @@ int bcc_elf_is_shared_obj(const char *path) {

int main(int argc, char *argv[])
{
uint64_t addr;
if (bcc_elf_findsym(argv[1], argv[2], -1, STT_FUNC, &addr) < 0)
return -1;
uint64_t addr;
if (bcc_elf_findsym(argv[1], argv[2], -1, STT_FUNC, &addr) < 0)
return -1;

printf("%s: %p\n", argv[2], (void *)addr);
return 0;
printf("%s: %p\n", argv[2], (void *)addr);
return 0;
}
#endif
20 changes: 10 additions & 10 deletions src/python/bcc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -980,42 +980,42 @@ def _sym_cache(pid):
return BPF._sym_caches[pid]

@staticmethod
def sym(addr, pid, show_module=False, show_address=False):
"""sym(addr, pid, show_module=False, show_address=False)
def sym(addr, pid, show_module=False, show_offset=False):
"""sym(addr, pid, show_module=False, show_offset=False)
Translate a memory address into a function name for a pid, which is
returned. When show_module is True, the module name is also included.
When show_address is True, the instruction offset as a hexadecimal
When show_offset is True, the instruction offset as a hexadecimal
number is also included in the string.
A pid of less than zero will access the kernel symbol cache.
Example output when both show_module and show_address are True:
Example output when both show_module and show_offset are True:
"start_thread+0x202 [libpthread-2.24.so]"
Example output when both show_module and show_address are False:
Example output when both show_module and show_offset are False:
"start_thread"
"""
name, offset, module = BPF._sym_cache(pid).resolve(addr)
offset = "+0x%x" % offset if show_address and name is not None else ""
offset = "+0x%x" % offset if show_offset and name is not None else ""
name = name or "[unknown]"
name = name + offset
module = " [%s]" % os.path.basename(module) if show_module else ""
return name + module

@staticmethod
def ksym(addr, show_module=False, show_address=False):
def ksym(addr, show_module=False, show_offset=False):
"""ksym(addr)
Translate a kernel memory address into a kernel function name, which is
returned. When show_module is True, the module name ("kernel") is also
included. When show_address is true, the instruction offset as a
included. When show_offset is true, the instruction offset as a
hexadecimal number is also included in the string.
Example output when both show_module and show_address are True:
Example output when both show_module and show_offset are True:
"default_idle+0x0 [kernel]"
"""
return BPF.sym(addr, -1, show_module, show_address)
return BPF.sym(addr, -1, show_module, show_offset)

@staticmethod
def ksymname(name):
Expand Down
2 changes: 1 addition & 1 deletion tools/memleak.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ def print_outstanding():
combined = []
for addr in stack:
combined.append(bpf_program.sym(addr, pid,
show_module=True, show_address=True))
show_module=True, show_offset=True))
alloc_info[info.stack_id] = Allocation(combined,
info.size)
if args.show_allocs:
Expand Down
4 changes: 2 additions & 2 deletions tools/old/memleak.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# Licensed under the Apache License, Version 2.0 (the "License")
# Copyright (C) 2016 Sasha Goldshtein.

from bcc import BPF, SymbolCache
from bcc import BPF
from time import sleep
from datetime import datetime
import argparse
Expand All @@ -24,7 +24,7 @@ def decode_stack(bpf, pid, info):
return "???"
for i in range(0, info.num_frames):
addr = info.callstack[i]
stack += " %s ;" % bpf.sym(addr, pid, show_address=True)
stack += " %s ;" % bpf.sym(addr, pid, show_offset=True)
return stack

def run_command_get_output(command):
Expand Down
2 changes: 1 addition & 1 deletion tools/old/stackcount.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def print_frame(addr):
print(" ", end="")
if verbose:
print("%-16x " % addr, end="")
print(b.ksym(addr, show_address=offset))
print(b.ksym(addr, show_offset=offset))

# output
exiting = 0 if args.interval else 1
Expand Down
2 changes: 1 addition & 1 deletion tools/old/stacksnoop.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@
(task, pid, cpu, flags, ts, msg) = b.trace_fields()
if msg != "":
(reg, addr) = msg.split(" ")
ip = b.ksym(int(addr, 16), show_address=offset)
ip = b.ksym(int(addr, 16), show_offset=offset)
msg = msg + " " + ip
if verbose:
print("%-18.9f %-12.12s %-6d %-3d %s" % (ts, task, pid, cpu, msg))
Expand Down
Loading

0 comments on commit 0155385

Please sign in to comment.