Skip to content

Commit

Permalink
Lazily symbolize regular ELF binaries
Browse files Browse the repository at this point in the history
This patch adds a new API to bcc_elf.h, bcc_elf_foreach_sym_lazy. This
helper avoids storing symbol names in string format, as for large
binaries this data can get quite large.

Instead we store the location in the ELF binary where we can later find
the string. Later on, we can load these strings on demand and cache them
in case they're accessed again.

This patch also makes lazy resolution the default for regular ELF
binaries, where regular means not perfmap or VDSO.
  • Loading branch information
danobi authored and yonghong-song committed Jul 12, 2019
1 parent 01ee0b6 commit d42fcc8
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 10 deletions.
59 changes: 51 additions & 8 deletions src/cc/bcc_elf.c
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,8 @@ static Elf_Scn * get_section(Elf *e, const char *section_name,

static int list_in_scn(Elf *e, Elf_Scn *section, size_t stridx, size_t symsize,
struct bcc_symbol_option *option,
bcc_elf_symcb callback, void *payload) {
bcc_elf_symcb callback, bcc_elf_symcb_lazy callback_lazy,
void *payload) {
Elf_Data *data = NULL;

#if defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
Expand All @@ -237,6 +238,7 @@ static int list_in_scn(Elf *e, Elf_Scn *section, size_t stridx, size_t symsize,
for (i = 0; i < symcount; ++i) {
GElf_Sym sym;
const char *name;
size_t name_len;

if (!gelf_getsym(data, (int)i, &sym))
continue;
Expand All @@ -245,6 +247,7 @@ static int list_in_scn(Elf *e, Elf_Scn *section, size_t stridx, size_t symsize,
continue;
if (name[0] == 0)
continue;
name_len = strlen(name);

if (sym.st_value == 0)
continue;
Expand Down Expand Up @@ -287,15 +290,22 @@ static int list_in_scn(Elf *e, Elf_Scn *section, size_t stridx, size_t symsize,
#endif
#endif

if (callback(name, sym.st_value, sym.st_size, payload) < 0)
int ret;
if (callback_lazy)
ret = callback_lazy(stridx, sym.st_name, name_len, sym.st_value,
sym.st_size, payload);
else
ret = callback(name, sym.st_value, sym.st_size, payload);
if (ret < 0)
return 1; // signal termination to caller
}
}

return 0;
}

static int listsymbols(Elf *e, bcc_elf_symcb callback, void *payload,
static int listsymbols(Elf *e, bcc_elf_symcb callback,
bcc_elf_symcb_lazy callback_lazy, void *payload,
struct bcc_symbol_option *option) {
Elf_Scn *section = NULL;

Expand All @@ -309,7 +319,7 @@ static int listsymbols(Elf *e, bcc_elf_symcb callback, void *payload,
continue;

int rc = list_in_scn(e, section, header.sh_link, header.sh_entsize,
option, callback, payload);
option, callback, callback_lazy, payload);
if (rc == 1)
break; // callback signaled termination

Expand Down Expand Up @@ -537,6 +547,7 @@ static char *find_debug_via_buildid(Elf *e) {
}

static int foreach_sym_core(const char *path, bcc_elf_symcb callback,
bcc_elf_symcb_lazy callback_lazy,
struct bcc_symbol_option *option, void *payload,
int is_debug_file) {
Elf *e;
Expand All @@ -561,12 +572,12 @@ static int foreach_sym_core(const char *path, bcc_elf_symcb callback,
debug_file = find_debug_via_debuglink(e, path,
option->check_debug_file_crc);
if (debug_file) {
foreach_sym_core(debug_file, callback, option, payload, 1);
foreach_sym_core(debug_file, callback, callback_lazy, option, payload, 1);
free(debug_file);
}
}

res = listsymbols(e, callback, payload, option);
res = listsymbols(e, callback, callback_lazy, payload, option);
elf_end(e);
close(fd);
return res;
Expand All @@ -575,7 +586,13 @@ static int foreach_sym_core(const char *path, bcc_elf_symcb callback,
int bcc_elf_foreach_sym(const char *path, bcc_elf_symcb callback,
void *option, void *payload) {
return foreach_sym_core(
path, callback, (struct bcc_symbol_option*)option, payload, 0);
path, callback, NULL, (struct bcc_symbol_option*)option, payload, 0);
}

int bcc_elf_foreach_sym_lazy(const char *path, bcc_elf_symcb_lazy callback,
void *option, void *payload) {
return foreach_sym_core(path, NULL, callback,
(struct bcc_symbol_option*)option, payload, 0);
}

int bcc_elf_get_text_scn_info(const char *path, uint64_t *addr,
Expand Down Expand Up @@ -738,7 +755,7 @@ int bcc_elf_foreach_vdso_sym(bcc_elf_symcb callback, void *payload) {
if (openelf_fd(vdso_image_fd, &elf) == -1)
return -1;

return listsymbols(elf, callback, payload, &default_option);
return listsymbols(elf, callback, NULL, payload, &default_option);
}

// return value: 0 : success
Expand Down Expand Up @@ -911,6 +928,32 @@ int bcc_elf_get_buildid(const char *path, char *buildid)
return 0;
}

int bcc_elf_symbol_str(const char *path, size_t section_idx,
size_t str_table_idx, char *out, size_t len)
{
Elf *e;
int fd, err = 0;
const char *name;

if (!out || !len)
return -1;

if (openelf(path, &e, &fd) < 0)
return -1;

if ((name = elf_strptr(e, section_idx, str_table_idx)) == NULL) {
err = -1;
goto exit;
}

strncpy(out, name, len);

exit:
elf_end(e);
close(fd);
return err;
}

#if 0
#include <stdio.h>

Expand Down
9 changes: 9 additions & 0 deletions src/cc/bcc_elf.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ typedef void (*bcc_elf_probecb)(const char *, const struct bcc_elf_usdt *,
// Symbol name, start address, length, payload
// Callback returning a negative value indicates to stop the iteration
typedef int (*bcc_elf_symcb)(const char *, uint64_t, uint64_t, void *);
// Section idx, str table idx, str length, start address, length, payload
typedef int (*bcc_elf_symcb_lazy)(size_t, size_t, size_t, uint64_t, uint64_t,
void *);
// Segment virtual address, memory size, file offset, payload
// Callback returning a negative value indicates to stop the iteration
typedef int (*bcc_elf_load_sectioncb)(uint64_t, uint64_t, uint64_t, void *);
Expand All @@ -57,6 +60,10 @@ int bcc_elf_foreach_load_section(const char *path,
// Returns -1 on error, and 0 on success or stopped by callback
int bcc_elf_foreach_sym(const char *path, bcc_elf_symcb callback, void *option,
void *payload);
// Similar to bcc_elf_foreach_sym, but pass reference to symbolized string along
// with symbolized string length
int bcc_elf_foreach_sym_lazy(const char *path, bcc_elf_symcb_lazy callback,
void *option, void *payload);
// Iterate over all symbols from current system's vDSO
// Returns -1 on error, and 0 on success or stopped by callback
int bcc_elf_foreach_vdso_sym(bcc_elf_symcb callback, void *payload);
Expand All @@ -70,6 +77,8 @@ int bcc_elf_is_exe(const char *path);
int bcc_elf_is_vdso(const char *name);
int bcc_free_memory();
int bcc_elf_get_buildid(const char *path, char *buildid);
int bcc_elf_symbol_str(const char *path, size_t section_idx,
size_t str_table_idx, char *out, size_t len);

#ifdef __cplusplus
}
Expand Down
20 changes: 19 additions & 1 deletion src/cc/bcc_syms.cc
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,14 @@ int ProcSyms::Module::_add_symbol(const char *symname, uint64_t start,
return 0;
}

int ProcSyms::Module::_add_symbol_lazy(size_t section_idx, size_t str_table_idx,
size_t str_len, uint64_t start,
uint64_t size, void *p) {
Module *m = static_cast<Module *>(p);
m->syms_.emplace_back(section_idx, str_table_idx, str_len, start, size);
return 0;
}

void ProcSyms::Module::load_sym_table() {
if (loaded_)
return;
Expand All @@ -286,7 +294,7 @@ void ProcSyms::Module::load_sym_table() {
if (type_ == ModuleType::PERF_MAP)
bcc_perf_map_foreach_sym(path_.c_str(), _add_symbol, this);
if (type_ == ModuleType::EXEC || type_ == ModuleType::SO)
bcc_elf_foreach_sym(path_.c_str(), _add_symbol, symbol_option_, this);
bcc_elf_foreach_sym_lazy(path_.c_str(), _add_symbol_lazy, symbol_option_, this);
if (type_ == ModuleType::VDSO)
bcc_elf_foreach_vdso_sym(_add_symbol, this);

Expand Down Expand Up @@ -359,6 +367,16 @@ bool ProcSyms::Module::find_addr(uint64_t offset, struct bcc_symbol *sym) {
uint64_t limit = it->start;
for (; offset >= it->start; --it) {
if (offset < it->start + it->size) {
// Resolve and cache the symbol name if necessary
if (!it->name) {
std::string sym_name(it->name_idx.str_len + 1, '\0');
if (bcc_elf_symbol_str(name_.c_str(), it->name_idx.section_idx,
it->name_idx.str_table_idx, &sym_name[0], sym_name.size()))
break;

it->name = &*(symnames_.emplace(std::move(sym_name)).first);
}

sym->name = it->name->c_str();
sym->offset = (offset - it->start);
return true;
Expand Down
19 changes: 18 additions & 1 deletion src/cc/syms.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,24 @@ class KSyms : SymbolCache {
};

class ProcSyms : SymbolCache {
struct NameIdx {
size_t section_idx;
size_t str_table_idx;
size_t str_len;
};

struct Symbol {
Symbol(const std::string *name, uint64_t start, uint64_t size)
: name(name), start(start), size(size) {}
const std::string *name;
Symbol(size_t section_idx, size_t str_table_idx, size_t str_len, uint64_t start,
uint64_t size)
: start(start), size(size) {
name_idx.section_idx = section_idx;
name_idx.str_table_idx = str_table_idx;
name_idx.str_len = str_len;
}
struct NameIdx name_idx;
const std::string *name{nullptr};
uint64_t start;
uint64_t size;

Expand Down Expand Up @@ -124,6 +138,9 @@ class ProcSyms : SymbolCache {

static int _add_symbol(const char *symname, uint64_t start, uint64_t size,
void *p);
static int _add_symbol_lazy(size_t section_idx, size_t str_table_idx,
size_t str_len, uint64_t start, uint64_t size,
void *p);
};

int pid_;
Expand Down

0 comments on commit d42fcc8

Please sign in to comment.