Skip to content

Commit

Permalink
tools: cgroup filtering in execsnoop/opensnoop
Browse files Browse the repository at this point in the history
Add a new option --cgroupmap in execsnoop and opensnoop to only display
results from processes that belong to one of the cgroups whose id,
returned by bpf_get_current_cgroup_id(), is in a pinned BPF hash map.

Examples of commands:
    # opensnoop --cgroupmap /sys/fs/bpf/test01
    # execsnoop --cgroupmap /sys/fs/bpf/test01

Cgroup ids can be discovered in userspace by the system call
name_to_handle_at(); an example of C program doing that is available in
examples/cgroupid/cgroupid.c.

More complete documentation is added in docs/filtering_by_cgroups.md

The documentation is independent from Kubernetes. However, my goal is to
use this feature in Kubernetes: I am preparing to use this in Inspektor
Gadget to select specific Kubernetes pods, depending on a Kubernetes
label selector. Kubernetes pods matching the label selector can come and
go during the execution of the bcc tools; Inspektor Gadget is updating
the BPF hash map used by the bcc tools accordingly.
  • Loading branch information
alban authored and yonghong-song committed Dec 21, 2019
1 parent 0aca2c7 commit b2aa29f
Show file tree
Hide file tree
Showing 9 changed files with 244 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@
cmake-build-debug
debian/**/*.log
obj-x86_64-linux-gnu
examples/cgroupid/cgroupid
65 changes: 65 additions & 0 deletions docs/filtering_by_cgroups.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Demonstrations of filtering by cgroups

Some tools have an option to filter by cgroup by referencing a pinned BPF hash
map managed externally.

Examples of commands:

```
# ./opensnoop --cgroupmap /sys/fs/bpf/test01
# ./execsnoop --cgroupmap /sys/fs/bpf/test01
```

The commands above will only display results from processes that belong to one
of the cgroups whose id, returned by `bpf_get_current_cgroup_id()`, is in the
pinned BPF hash map.

The BPF hash map can be created by:

```
# bpftool map create /sys/fs/bpf/test01 type hash key 8 value 8 entries 128 \
name cgroupset flags 0
```

To get a shell in a new cgroup, you can use:

```
# systemd-run --pty --unit test bash
```

The shell will be running in the cgroup
`/sys/fs/cgroup/unified/system.slice/test.service`.

The cgroup id can be discovered using the `name_to_handle_at()` system call. In
the examples/cgroupid, you will find an example of program to get the cgroup
id.

```
# cd examples/cgroupid
# make
# ./cgroupid hex /sys/fs/cgroup/unified/system.slice/test.service
```

or, using Docker:

```
# cd examples/cgroupid
# docker build -t cgroupid .
# docker run --rm --privileged -v /sys/fs/cgroup:/sys/fs/cgroup \
cgroupid cgroupid hex /sys/fs/cgroup/unified/system.slice/test.service
```

This prints the cgroup id as a hexadecimal string in the host endianness such
as `77 16 00 00 01 00 00 00`.

```
# FILE=/sys/fs/bpf/test01
# CGROUPID_HEX="77 16 00 00 01 00 00 00"
# bpftool map update pinned $FILE key hex $CGROUPID_HEX value hex 00 00 00 00 00 00 00 00 any
```

Now that the shell started by systemd-run has its cgroup id in the BPF hash
map, bcc tools will display results from this shell. Cgroups can be added and
removed from the BPF hash map without restarting the bcc tool.

This feature is useful for integrating bcc tools in external projects.
16 changes: 16 additions & 0 deletions examples/cgroupid/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# builder image
FROM ubuntu:18.04 as builder
RUN apt-get update && \
apt-get upgrade -y && \
apt-get install -y --no-install-recommends \
gcc build-essential && \
apt-get purge --auto-remove && \
apt-get clean

ADD cgroupid.c /cgroupid.c
ADD Makefile /Makefile
RUN make

# Main image
FROM amd64/alpine:3.8 as base
COPY --from=builder /cgroupid /bin
2 changes: 2 additions & 0 deletions examples/cgroupid/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
cgroupid: cgroupid.c
gcc -Wall -static -o cgroupid cgroupid.c
101 changes: 101 additions & 0 deletions examples/cgroupid/cgroupid.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#define _GNU_SOURCE
#include <stdlib.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdint.h>
#include <ctype.h>
#include <unistd.h>
#include <linux/magic.h>
#include <sys/vfs.h>
#include <string.h>
#include <errno.h>

/* 67e9c74b8a873408c27ac9a8e4c1d1c8d72c93ff (4.5) */
#ifndef CGROUP2_SUPER_MAGIC
#define CGROUP2_SUPER_MAGIC 0x63677270
#endif

struct cgid_file_handle
{
//struct file_handle handle;
unsigned int handle_bytes;
int handle_type;
uint64_t cgid;
};

uint64_t get_cgroupid(const char *pathname) {
struct statfs fs;
int err;
struct cgid_file_handle *h;
int mount_id;
uint64_t ret;

err = statfs(pathname, &fs);
if (err != 0) {
fprintf (stderr, "statfs on %s failed: %s\n", pathname, strerror(errno));
exit(1);
}

if ((fs.f_type != (typeof(fs.f_type)) CGROUP2_SUPER_MAGIC)) {
fprintf (stderr, "File %s is not on a cgroup2 mount.\n", pathname);
exit(1);
}

h = malloc(sizeof(struct cgid_file_handle));
if (!h) {
fprintf (stderr, "Cannot allocate memory.\n");
exit(1);
}

h->handle_bytes = 8;
err = name_to_handle_at(AT_FDCWD, pathname, (struct file_handle *)h, &mount_id, 0);
if (err != 0) {
fprintf (stderr, "name_to_handle_at failed: %s\n", strerror(errno));
exit(1);
}

if (h->handle_bytes != 8) {
fprintf (stderr, "Unexpected handle size: %d. \n", h->handle_bytes);
exit(1);
}

ret = h->cgid;
free(h);

return ret;
}

void usage() {
fprintf (stderr, "Usage: cgroupid FORMAT FILE\n");
fprintf (stderr, "Print the cgroup id of a cgroup2 directory.\n");
fprintf (stderr, "Example: cgroupid print-hex /sys/fs/cgroup/unified/system.slice/test.service\n");
fprintf (stderr, "\n");
fprintf (stderr, "Format:\n");
fprintf (stderr, " number print the cgroup id as a number\n");
fprintf (stderr, " hex print the cgroup id as a hexadecimal, suitable for bpftool\n");
fprintf (stderr, "\n");
}

int main(int argc, char **argv) {
uint64_t cgroupid;
int i;

if (argc != 3 || (strcmp(argv[1], "number") != 0 && strcmp(argv[1], "hex"))) {
usage();
exit(1);
}

cgroupid = get_cgroupid(argv[2]);

if (strcmp(argv[1], "number") == 0)
printf("%lu\n", cgroupid);

if (strcmp(argv[1], "hex") == 0) {
for (i=0; i<8; i++) {
printf("%02x%s", ((unsigned char *)&cgroupid)[i], i == 7 ? "\n":" ");
}
}
return 0;
}
25 changes: 25 additions & 0 deletions tools/execsnoop.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
./execsnoop -q # add "quotemarks" around arguments
./execsnoop -n main # only print command lines containing "main"
./execsnoop -l tpkg # only print command where arguments contains "tpkg"
./opensnoop --cgroupmap ./mappath # only trace cgroups in this BPF map
"""
parser = argparse.ArgumentParser(
description="Trace exec() syscalls",
Expand All @@ -47,6 +48,8 @@
help="include timestamp on output")
parser.add_argument("-x", "--fails", action="store_true",
help="include failed exec()s")
parser.add_argument("--cgroupmap",
help="trace cgroups in this BPF map only")
parser.add_argument("-q", "--quote", action="store_true",
help="Add quotemarks (\") around arguments."
)
Expand Down Expand Up @@ -84,6 +87,9 @@
int retval;
};
#if CGROUPSET
BPF_TABLE_PINNED("hash", u64, u64, cgroupset, 1024, "CGROUPPATH");
#endif
BPF_PERF_OUTPUT(events);
static int __submit_arg(struct pt_regs *ctx, void *ptr, struct data_t *data)
Expand All @@ -108,6 +114,13 @@
const char __user *const __user *__argv,
const char __user *const __user *__envp)
{
#if CGROUPSET
u64 cgroupid = bpf_get_current_cgroup_id();
if (cgroupset.lookup(&cgroupid) == NULL) {
return 0;
}
#endif
// create data here and pass to submit_arg to save stack space (#555)
struct data_t data = {};
struct task_struct *task;
Expand Down Expand Up @@ -141,6 +154,13 @@
int do_ret_sys_execve(struct pt_regs *ctx)
{
#if CGROUPSET
u64 cgroupid = bpf_get_current_cgroup_id();
if (cgroupset.lookup(&cgroupid) == NULL) {
return 0;
}
#endif
struct data_t data = {};
struct task_struct *task;
Expand All @@ -162,6 +182,11 @@
"""

bpf_text = bpf_text.replace("MAXARG", args.max_args)
if args.cgroupmap:
bpf_text = bpf_text.replace('CGROUPSET', '1')
bpf_text = bpf_text.replace('CGROUPPATH', args.cgroupmap)
else:
bpf_text = bpf_text.replace('CGROUPSET', '0')
if args.ebpf:
print(bpf_text)
exit()
Expand Down
9 changes: 9 additions & 0 deletions tools/execsnoop_example.txt
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,15 @@ python 3345086 4146419 0 /usr/local/bin/python /usr/local/bin/yum in
yum 3345086 4146419 0 /usr/bin/yum install testpkg
rpm 3345452 4146419 0 /bin/rpm -qa testpkg


The --cgroupmap option filters based on a cgroup set. It is meant to be used
with an externally created map.

# ./execsnoop --cgroupmap /sys/fs/bpf/test01

For more details, see docs/filtering_by_cgroups.md


USAGE message:

# ./execsnoop -h
Expand Down
17 changes: 17 additions & 0 deletions tools/opensnoop.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
./opensnoop -n main # only print process names containing "main"
./opensnoop -e # show extended fields
./opensnoop -f O_WRONLY -f O_RDWR # only print calls for writing
./opensnoop --cgroupmap ./mappath # only trace cgroups in this BPF map
"""
parser = argparse.ArgumentParser(
description="Trace open() syscalls",
Expand All @@ -50,6 +51,8 @@
help="trace this PID only")
parser.add_argument("-t", "--tid",
help="trace this TID only")
parser.add_argument("--cgroupmap",
help="trace cgroups in this BPF map only")
parser.add_argument("-u", "--uid",
help="trace this UID only")
parser.add_argument("-d", "--duration",
Expand Down Expand Up @@ -99,6 +102,9 @@
int flags; // EXTENDED_STRUCT_MEMBER
};
#if CGROUPSET
BPF_TABLE_PINNED("hash", u64, u64, cgroupset, 1024, "CGROUPPATH");
#endif
BPF_HASH(infotmp, u64, struct val_t);
BPF_PERF_OUTPUT(events);
Expand All @@ -113,6 +119,12 @@
PID_TID_FILTER
UID_FILTER
FLAGS_FILTER
#if CGROUPSET
u64 cgroupid = bpf_get_current_cgroup_id();
if (cgroupset.lookup(&cgroupid) == NULL) {
return 0;
}
#endif
if (bpf_get_current_comm(&val.comm, sizeof(val.comm)) == 0) {
val.id = id;
val.fname = filename;
Expand Down Expand Up @@ -163,6 +175,11 @@
'if (uid != %s) { return 0; }' % args.uid)
else:
bpf_text = bpf_text.replace('UID_FILTER', '')
if args.cgroupmap:
bpf_text = bpf_text.replace('CGROUPSET', '1')
bpf_text = bpf_text.replace('CGROUPPATH', args.cgroupmap)
else:
bpf_text = bpf_text.replace('CGROUPSET', '0')
if args.flag_filter:
bpf_text = bpf_text.replace('FLAGS_FILTER',
'if (!(flags & %d)) { return 0; }' % flag_filter_mask)
Expand Down
8 changes: 8 additions & 0 deletions tools/opensnoop_example.txt
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,14 @@ PID COMM FD ERR FLAGS PATH
28051 sshd 7 0 00100001 /var/log/wtmp


The --cgroupmap option filters based on a cgroup set. It is meant to be used
with an externally created map.

# ./opensnoop --cgroupmap /sys/fs/bpf/test01

For more details, see docs/filtering_by_cgroups.md


USAGE message:

# ./opensnoop -h
Expand Down

0 comments on commit b2aa29f

Please sign in to comment.