Skip to content

Commit

Permalink
u* tools: automatically detect the language (iovisor#1067)
Browse files Browse the repository at this point in the history
* cc: bcc function to detect the language of a process

bcc_procutils_language looks into /proc/$pid/cmdline, /proc/$pid/exe,
and /proc/$pid/maps to determine the language.
Python wrapper takes a list of candidate languages; if the detected
language is not part of the list, None is returned.

* u* tools: automatically detect the language

Uses the detect_language bcc helper. -l switch can override the
detected language. In uthreads and ucalls, the language can be
overwritten to 'none' to trace pthreads and syscalls respectively.

All tools use the -l switch to set the language, for consistency.
  • Loading branch information
pchaigno authored and goldshtn committed Mar 30, 2017
1 parent 00f662d commit 4bb6d7f
Show file tree
Hide file tree
Showing 17 changed files with 219 additions and 87 deletions.
56 changes: 56 additions & 0 deletions src/cc/bcc_proc.c
Original file line number Diff line number Diff line change
Expand Up @@ -485,3 +485,59 @@ bool bcc_procutils_exit_mountns(struct ns_cookie *nc) {

return rc;
}

/* Detects the following languages + C. */
const char *languages[] = {"java", "python", "ruby", "php", "node"};
const char *language_c = "c";
const int nb_languages = 5;

const char *bcc_procutils_language(int pid) {
char procfilename[22], line[4096], pathname[32], *str;
FILE *procfile;
int i, ret;

/* Look for clues in the absolute path to the executable. */
sprintf(procfilename, "/proc/%ld/exe", (long)pid);
if (realpath(procfilename, line)) {
for (i = 0; i < nb_languages; i++)
if (strstr(line, languages[i]))
return languages[i];
}


sprintf(procfilename, "/proc/%ld/maps", (long)pid);
procfile = fopen(procfilename, "r");
if (!procfile)
return NULL;

/* Look for clues in memory mappings. */
bool libc = false;
do {
char perm[8], dev[8];
long long begin, end, size, inode;
ret = fscanf(procfile, "%llx-%llx %s %llx %s %lld", &begin, &end, perm,
&size, dev, &inode);
if (!fgets(line, sizeof(line), procfile))
break;
if (ret == 6) {
char *mapname = line;
char *newline = strchr(line, '\n');
if (newline)
newline[0] = '\0';
while (isspace(mapname[0])) mapname++;
for (i = 0; i < nb_languages; i++) {
sprintf(pathname, "/lib%s", languages[i]);
if (strstr(mapname, pathname))
return languages[i];
if ((str = strstr(mapname, "libc")) &&
(str[4] == '-' || str[4] == '.'))
libc = true;
}
}
} while (ret && ret != EOF);

fclose(procfile);

/* Return C as the language if libc was found and nothing else. */
return libc ? language_c : NULL;
}
1 change: 1 addition & 0 deletions src/cc/bcc_proc.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ int bcc_procutils_each_ksym(bcc_procutils_ksymcb callback, void *payload);
void bcc_procutils_free(const char *ptr);
bool bcc_procutils_enter_mountns(int pid, struct ns_cookie *nc);
bool bcc_procutils_exit_mountns(struct ns_cookie *nc);
const char *bcc_procutils_language(int pid);

#ifdef __cplusplus
}
Expand Down
2 changes: 2 additions & 0 deletions src/python/bcc/libbcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ class bcc_symbol(ct.Structure):
lib.bcc_procutils_which_so.argtypes = [ct.c_char_p, ct.c_int]
lib.bcc_procutils_free.restype = None
lib.bcc_procutils_free.argtypes = [ct.c_void_p]
lib.bcc_procutils_language.restype = ct.POINTER(ct.c_char)
lib.bcc_procutils_language.argtypes = [ct.c_int]

lib.bcc_resolve_symname.restype = ct.c_int
lib.bcc_resolve_symname.argtypes = [
Expand Down
8 changes: 8 additions & 0 deletions src/python/bcc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import ctypes as ct

from .libbcc import lib

def _read_cpu_range(path):
cpus = []
Expand All @@ -31,3 +34,8 @@ def get_online_cpus():

def get_possible_cpus():
return _read_cpu_range('/sys/devices/system/cpu/possible')

def detect_language(candidates, pid):
res = lib.bcc_procutils_language(pid)
language = ct.cast(res, ct.c_char_p).value.decode()
return language if language in candidates else None
6 changes: 6 additions & 0 deletions tests/cc/test_c_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ using namespace std;

static pid_t spawn_child(void *, bool, bool, int (*)(void *));

TEST_CASE("language detection", "[c_api]") {
const char *c = bcc_procutils_language(getpid());
REQUIRE(c);
REQUIRE(string(c).compare("c") == 0);
}

TEST_CASE("shared object resolution", "[c_api]") {
char *libm = bcc_procutils_which_so("m", 0);
REQUIRE(libm);
Expand Down
6 changes: 3 additions & 3 deletions tests/python/test_tools_smoke.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,14 +311,14 @@ def test_ttysnoop(self):
def test_ucalls(self):
# This attaches a large number (300+) kprobes, which can be slow,
# so use an increased timeout value.
self.run_with_int("ucalls.py -S %d" % os.getpid(),
self.run_with_int("ucalls.py -l none -S %d" % os.getpid(),
timeout=30, kill_timeout=30)

@skipUnless(kernel_version_ge(4,4), "requires kernel >= 4.4")
def test_uflow(self):
# The Python installed on the Ubuntu buildbot doesn't have USDT
# probes, so we can't run uflow.
# self.run_with_int("uflow.py python %d" % os.getpid())
# self.run_with_int("uflow.py -l python %d" % os.getpid())
pass

@skipUnless(kernel_version_ge(4,4), "requires kernel >= 4.4")
Expand All @@ -329,7 +329,7 @@ def test_ugc(self):

@skipUnless(kernel_version_ge(4,4), "requires kernel >= 4.4")
def test_uobjnew(self):
self.run_with_int("uobjnew.py c %d" % os.getpid())
self.run_with_int("uobjnew.py -l c %d" % os.getpid())

@skipUnless(kernel_version_ge(4,4), "requires kernel >= 4.4")
def test_ustat(self):
Expand Down
7 changes: 6 additions & 1 deletion tests/python/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
# Copyright (c) Catalysts GmbH
# Licensed under the Apache License, Version 2.0 (the "License")

from bcc.utils import get_online_cpus
from bcc.utils import get_online_cpus, detect_language
import multiprocessing
import unittest
import os

class TestUtils(unittest.TestCase):
def test_get_online_cpus(self):
Expand All @@ -13,6 +14,10 @@ def test_get_online_cpus(self):

self.assertEqual(len(online_cpus), num_cores)

def test_detect_language(self):
candidates = ["java", "ruby", "php", "node", "c", "python"]
language = detect_language(candidates, os.getpid())
self.assertEqual(language, "python")

if __name__ == "__main__":
unittest.main()
30 changes: 19 additions & 11 deletions tools/ucalls.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,11 @@

from __future__ import print_function
import argparse
from bcc import BPF, USDT
from bcc import BPF, USDT, utils
from time import sleep
import os

languages = ["java", "python", "ruby", "php"]

examples = """examples:
./ucalls -l java 185 # trace Java calls and print statistics on ^C
Expand All @@ -34,8 +37,7 @@
parser.add_argument("pid", type=int, help="process id to attach to")
parser.add_argument("interval", type=int, nargs='?',
help="print every specified number of seconds")
parser.add_argument("-l", "--language",
choices=["java", "python", "ruby", "php"],
parser.add_argument("-l", "--language", choices=languages + ["none"],
help="language to trace (if none, trace syscalls only)")
parser.add_argument("-T", "--top", type=int,
help="number of most frequent/slow calls to print")
Expand All @@ -49,38 +51,44 @@
help="report times in milliseconds (default is microseconds)")
args = parser.parse_args()

language = args.language
if not language:
language = utils.detect_language(languages, args.pid)

# We assume that the entry and return probes have the same arguments. This is
# the case for Java, Python, Ruby, and PHP. If there's a language where it's
# not the case, we will need to build a custom correlator from entry to exit.
if args.language == "java":
if language == "java":
# TODO for JVM entries, we actually have the real length of the class
# and method strings in arg3 and arg5 respectively, so we can insert
# the null terminator in its proper position.
entry_probe = "method__entry"
return_probe = "method__return"
read_class = "bpf_usdt_readarg(2, ctx, &clazz);"
read_method = "bpf_usdt_readarg(4, ctx, &method);"
elif args.language == "python":
elif language == "python":
entry_probe = "function__entry"
return_probe = "function__return"
read_class = "bpf_usdt_readarg(1, ctx, &clazz);" # filename really
read_method = "bpf_usdt_readarg(2, ctx, &method);"
elif args.language == "ruby":
elif language == "ruby":
# TODO Also probe cmethod__entry and cmethod__return with same arguments
entry_probe = "method__entry"
return_probe = "method__return"
read_class = "bpf_usdt_readarg(1, ctx, &clazz);"
read_method = "bpf_usdt_readarg(2, ctx, &method);"
elif args.language == "php":
elif language == "php":
entry_probe = "function__entry"
return_probe = "function__return"
read_class = "bpf_usdt_readarg(4, ctx, &clazz);"
read_method = "bpf_usdt_readarg(1, ctx, &method);"
elif not args.language:
elif not language or language == "none":
if not args.syscalls:
print("Nothing to do; use -S to trace syscalls.")
exit(1)
entry_probe, return_probe, read_class, read_method = ("", "", "", "")
if language:
language = None

program = """
#include <linux/ptrace.h>
Expand Down Expand Up @@ -213,11 +221,11 @@
""".replace("READ_CLASS", read_class) \
.replace("READ_METHOD", read_method) \
.replace("PID_FILTER", "if ((pid >> 32) != %d) { return 0; }" % args.pid) \
.replace("DEFINE_NOLANG", "#define NOLANG" if not args.language else "") \
.replace("DEFINE_NOLANG", "#define NOLANG" if not language else "") \
.replace("DEFINE_LATENCY", "#define LATENCY" if args.latency else "") \
.replace("DEFINE_SYSCALLS", "#define SYSCALLS" if args.syscalls else "")

if args.language:
if language:
usdt = USDT(pid=args.pid)
usdt.enable_probe_or_bail(entry_probe, "trace_entry")
if args.latency:
Expand Down Expand Up @@ -278,7 +286,7 @@ def clear_data():

exit_signaled = False
print("Tracing calls in process %d (language: %s)... Ctrl-C to quit." %
(args.pid, args.language or "none"))
(args.pid, language or "none"))
while True:
try:
sleep(args.interval or 99999999)
Expand Down
9 changes: 5 additions & 4 deletions tools/ucalls_example.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ argdist, biotop, fileslower, and others.

For example, to trace method call latency in a Java application:

# ucalls -L -l java $(pidof java)
# ucalls -L $(pidof java)
Tracing calls in process 26877 (language: java)... Ctrl-C to quit.

METHOD # CALLS TIME (us)
Expand Down Expand Up @@ -48,7 +48,7 @@ Detaching kernel probes, please wait...
To print only the top 5 methods and report times in milliseconds (the default
is microseconds):

# ucalls -l python -mT 5 $(pidof python)
# ucalls -mT 5 $(pidof python)
Tracing calls in process 26914 (language: python)... Ctrl-C to quit.

METHOD # CALLS
Expand All @@ -60,7 +60,8 @@ METHOD # CALLS
USAGE message:

# ./ucalls.py -h
usage: ucalls.py [-h] [-l {java,python,ruby,php}] [-T TOP] [-L] [-S] [-v] [-m]
usage: ucalls.py [-h] [-l {java,python,ruby,php,none}] [-T TOP] [-L] [-S] [-v]
[-m]
pid [interval]

Summarize method calls in high-level languages.
Expand All @@ -71,7 +72,7 @@ positional arguments:

optional arguments:
-h, --help show this help message and exit
-l {java,python,ruby,php}, --language {java,python,ruby,php}
-l {java,python,ruby,php,none}, --language {java,python,ruby,php,none}
language to trace (if none, trace syscalls only)
-T TOP, --top TOP number of most frequent/slow calls to print
-L, --latency record method latency from enter to exit (except
Expand Down
32 changes: 21 additions & 11 deletions tools/uflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,24 @@

from __future__ import print_function
import argparse
from bcc import BPF, USDT
from bcc import BPF, USDT, utils
import ctypes as ct
import time
import os

languages = ["java", "python", "ruby", "php"]

examples = """examples:
./uflow java 185 # trace Java method calls in process 185
./uflow ruby 1344 # trace Ruby method calls in process 1344
./uflow -M indexOf java 185 # trace only 'indexOf'-prefixed methods
./uflow -C '<stdin>' python 180 # trace only REPL-defined methods
./uflow -l java 185 # trace Java method calls in process 185
./uflow -l ruby 134 # trace Ruby method calls in process 134
./uflow -M indexOf -l java 185 # trace only 'indexOf'-prefixed methods
./uflow -C '<stdin>' -l python 180 # trace only REPL-defined methods
"""
parser = argparse.ArgumentParser(
description="Trace method execution flow in high-level languages.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=examples)
parser.add_argument("language", choices=["java", "python", "ruby", "php"],
parser.add_argument("-l", "--language", choices=languages,
help="language to trace")
parser.add_argument("pid", type=int, help="process id to attach to")
parser.add_argument("-M", "--method",
Expand Down Expand Up @@ -113,21 +116,25 @@ def enable_probe(probe_name, func_name, read_class, read_method, is_return):

usdt = USDT(pid=args.pid)

if args.language == "java":
language = args.language
if not language:
language = utils.detect_language(languages, args.pid)

if language == "java":
enable_probe("method__entry", "java_entry",
"bpf_usdt_readarg(2, ctx, &clazz);",
"bpf_usdt_readarg(4, ctx, &method);", is_return=False)
enable_probe("method__return", "java_return",
"bpf_usdt_readarg(2, ctx, &clazz);",
"bpf_usdt_readarg(4, ctx, &method);", is_return=True)
elif args.language == "python":
elif language == "python":
enable_probe("function__entry", "python_entry",
"bpf_usdt_readarg(1, ctx, &clazz);", # filename really
"bpf_usdt_readarg(2, ctx, &method);", is_return=False)
enable_probe("function__return", "python_return",
"bpf_usdt_readarg(1, ctx, &clazz);", # filename really
"bpf_usdt_readarg(2, ctx, &method);", is_return=True)
elif args.language == "ruby":
elif language == "ruby":
enable_probe("method__entry", "ruby_entry",
"bpf_usdt_readarg(1, ctx, &clazz);",
"bpf_usdt_readarg(2, ctx, &method);", is_return=False)
Expand All @@ -140,21 +147,24 @@ def enable_probe(probe_name, func_name, read_class, read_method, is_return):
enable_probe("cmethod__return", "ruby_creturn",
"bpf_usdt_readarg(1, ctx, &clazz);",
"bpf_usdt_readarg(2, ctx, &method);", is_return=True)
elif args.language == "php":
elif language == "php":
enable_probe("function__entry", "php_entry",
"bpf_usdt_readarg(4, ctx, &clazz);",
"bpf_usdt_readarg(1, ctx, &method);", is_return=False)
enable_probe("function__return", "php_return",
"bpf_usdt_readarg(4, ctx, &clazz);",
"bpf_usdt_readarg(1, ctx, &method);", is_return=True)
else:
print("No language detected; use -l to trace a language.")
exit(1)

if args.verbose:
print(usdt.get_text())
print(program)

bpf = BPF(text=program, usdt_contexts=[usdt])
print("Tracing method calls in %s process %d... Ctrl-C to quit." %
(args.language, args.pid))
(language, args.pid))
print("%-3s %-6s %-6s %-8s %s" % ("CPU", "PID", "TID", "TIME(us)", "METHOD"))

class CallEvent(ct.Structure):
Expand Down
Loading

0 comments on commit 4bb6d7f

Please sign in to comment.