#!/usr/bin/python
# @lint-avoid-python-3-compatibility-imports
#
# capable   Trace security capabilitiy checks (cap_capable()).
#           For Linux, uses BCC, eBPF. Embedded C.
#
# USAGE: capable [-h] [-v] [-p PID] [-K] [-U]
#
# Copyright 2016 Netflix, Inc.
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 13-Sep-2016   Brendan Gregg   Created this.

from __future__ import print_function
from os import getpid
from functools import partial
from bcc import BPF
from bcc.containers import filter_by_containers
import errno
import argparse
from time import strftime

# arguments
examples = """examples:
    ./capable             # trace capability checks
    ./capable -v          # verbose: include non-audit checks
    ./capable -p 181      # only trace PID 181
    ./capable -K          # add kernel stacks to trace
    ./capable -U          # add user-space stacks to trace
    ./capable -x          # extra fields: show TID and INSETID columns
    ./capable --unique    # don't repeat stacks for the same pid or cgroup
    ./capable --cgroupmap mappath  # only trace cgroups in this BPF map
    ./capable --mntnsmap mappath   # only trace mount namespaces in the map
"""
parser = argparse.ArgumentParser(
    description="Trace security capability checks",
    formatter_class=argparse.RawDescriptionHelpFormatter,
    epilog=examples)
parser.add_argument("-v", "--verbose", action="store_true",
    help="include non-audit checks")
parser.add_argument("-p", "--pid",
    help="trace this PID only")
parser.add_argument("-K", "--kernel-stack", action="store_true",
    help="output kernel stack trace")
parser.add_argument("-U", "--user-stack", action="store_true",
    help="output user stack trace")
parser.add_argument("-x", "--extra", action="store_true",
    help="show extra fields in TID and INSETID columns")
parser.add_argument("--cgroupmap",
    help="trace cgroups in this BPF map only")
parser.add_argument("--mntnsmap",
    help="trace mount namespaces in this BPF map only")
parser.add_argument("--unique", action="store_true",
    help="don't repeat stacks for the same pid or cgroup")
args = parser.parse_args()
debug = 0

# capabilities to names, generated from (and will need updating):
# awk '/^#define.CAP_.*[0-9]$/ { print "    " $3 ": \"" $2 "\"," }' \
#     include/uapi/linux/capability.h
capabilities = {
    0: "CAP_CHOWN",
    1: "CAP_DAC_OVERRIDE",
    2: "CAP_DAC_READ_SEARCH",
    3: "CAP_FOWNER",
    4: "CAP_FSETID",
    5: "CAP_KILL",
    6: "CAP_SETGID",
    7: "CAP_SETUID",
    8: "CAP_SETPCAP",
    9: "CAP_LINUX_IMMUTABLE",
    10: "CAP_NET_BIND_SERVICE",
    11: "CAP_NET_BROADCAST",
    12: "CAP_NET_ADMIN",
    13: "CAP_NET_RAW",
    14: "CAP_IPC_LOCK",
    15: "CAP_IPC_OWNER",
    16: "CAP_SYS_MODULE",
    17: "CAP_SYS_RAWIO",
    18: "CAP_SYS_CHROOT",
    19: "CAP_SYS_PTRACE",
    20: "CAP_SYS_PACCT",
    21: "CAP_SYS_ADMIN",
    22: "CAP_SYS_BOOT",
    23: "CAP_SYS_NICE",
    24: "CAP_SYS_RESOURCE",
    25: "CAP_SYS_TIME",
    26: "CAP_SYS_TTY_CONFIG",
    27: "CAP_MKNOD",
    28: "CAP_LEASE",
    29: "CAP_AUDIT_WRITE",
    30: "CAP_AUDIT_CONTROL",
    31: "CAP_SETFCAP",
    32: "CAP_MAC_OVERRIDE",
    33: "CAP_MAC_ADMIN",
    34: "CAP_SYSLOG",
    35: "CAP_WAKE_ALARM",
    36: "CAP_BLOCK_SUSPEND",
    37: "CAP_AUDIT_READ",
    38: "CAP_PERFMON",
    39: "CAP_BPF",
    40: "CAP_CHECKPOINT_RESTORE",
}

class Enum(set):
    def __getattr__(self, name):
        if name in self:
            return name
        raise AttributeError

# Stack trace types
StackType = Enum(("Kernel", "User",))

# define BPF program
bpf_text = """
#include <uapi/linux/ptrace.h>
#include <linux/sched.h>
#include <linux/security.h>

struct data_t {
   u32 tgid;
   u32 pid;
   u32 uid;
   int cap;
   int audit;
   int insetid;
   char comm[TASK_COMM_LEN];
#ifdef KERNEL_STACKS
   int kernel_stack_id;
#endif
#ifdef USER_STACKS
   int user_stack_id;
#endif
};

BPF_PERF_OUTPUT(events);

#if UNIQUESET
struct repeat_t {
   int cap;
   u32 tgid;
#if CGROUPSET
   u64 cgroupid;
#endif
#ifdef KERNEL_STACKS
   int kernel_stack_id;
#endif
#ifdef USER_STACKS
   int user_stack_id;
#endif
};
BPF_HASH(seen, struct repeat_t, u64);
#endif

#if defined(USER_STACKS) || defined(KERNEL_STACKS)
BPF_STACK_TRACE(stacks, 2048);
#endif

int kprobe__cap_capable(struct pt_regs *ctx, const struct cred *cred,
    struct user_namespace *targ_ns, int cap, int cap_opt)
{
    u64 __pid_tgid = bpf_get_current_pid_tgid();
    u32 tgid = __pid_tgid >> 32;
    u32 pid = __pid_tgid;
    int audit;
    int insetid;

  #ifdef CAP_OPT_NONE
    audit = (cap_opt & 0b10) == 0;
    insetid = (cap_opt & 0b100) != 0;
  #else
    audit = cap_opt;
    insetid = -1;
  #endif

    FILTER1
    FILTER2
    FILTER3

    if (container_should_be_filtered()) {
        return 0;
    }

    u32 uid = bpf_get_current_uid_gid();

    struct data_t data = {};

    data.tgid = tgid;
    data.pid = pid;
    data.uid = uid;
    data.cap = cap;
    data.audit = audit;
    data.insetid = insetid;
#ifdef KERNEL_STACKS
    data.kernel_stack_id = stacks.get_stackid(ctx, 0);
#endif
#ifdef USER_STACKS
    data.user_stack_id = stacks.get_stackid(ctx, BPF_F_USER_STACK);
#endif

#if UNIQUESET
    struct repeat_t repeat = {0,};
    repeat.cap = cap;
#if CGROUP_ID_SET
    repeat.cgroupid = bpf_get_current_cgroup_id();
#else
    repeat.tgid = tgid;
#endif
#ifdef KERNEL_STACKS
    repeat.kernel_stack_id = data.kernel_stack_id;
#endif
#ifdef USER_STACKS
    repeat.user_stack_id = data.user_stack_id;
#endif
    if (seen.lookup(&repeat) != NULL) {
        return 0;
    }
    u64 zero = 0;
    seen.update(&repeat, &zero);
#endif

    bpf_get_current_comm(&data.comm, sizeof(data.comm));
    events.perf_submit(ctx, &data, sizeof(data));

    return 0;
};
"""
if args.pid:
    bpf_text = bpf_text.replace('FILTER1',
        'if (pid != %s) { return 0; }' % args.pid)
if not args.verbose:
    bpf_text = bpf_text.replace('FILTER2', 'if (audit == 0) { return 0; }')
if args.kernel_stack:
    bpf_text = "#define KERNEL_STACKS\n" + bpf_text
if args.user_stack:
    bpf_text = "#define USER_STACKS\n" + bpf_text
bpf_text = bpf_text.replace('FILTER1', '')
bpf_text = bpf_text.replace('FILTER2', '')
bpf_text = bpf_text.replace('FILTER3',
    'if (pid == %s) { return 0; }' % getpid())
bpf_text = filter_by_containers(args) + bpf_text
if args.unique:
    bpf_text = bpf_text.replace('UNIQUESET', '1')
else:
    bpf_text = bpf_text.replace('UNIQUESET', '0')
if debug:
    print(bpf_text)

# initialize BPF
b = BPF(text=bpf_text)

# header
if args.extra:
    print("%-9s %-6s %-6s %-6s %-16s %-4s %-20s %-6s %s" % (
        "TIME", "UID", "PID", "TID", "COMM", "CAP", "NAME", "AUDIT", "INSETID"))
else:
    print("%-9s %-6s %-6s %-16s %-4s %-20s %-6s" % (
        "TIME", "UID", "PID", "COMM", "CAP", "NAME", "AUDIT"))

def stack_id_err(stack_id):
    # -EFAULT in get_stackid normally means the stack-trace is not available,
    # Such as getting kernel stack trace in userspace code
    return (stack_id < 0) and (stack_id != -errno.EFAULT)

def print_stack(bpf, stack_id, stack_type, tgid):
    if stack_id_err(stack_id):
        print("    [Missed %s Stack]" % stack_type)
        return
    stack = list(bpf.get_table("stacks").walk(stack_id))
    for addr in stack:
        print("        ", end="")
        print("%s" % (bpf.sym(addr, tgid, show_module=True, show_offset=True)))

# process event
def print_event(bpf, cpu, data, size):
    event = b["events"].event(data)

    if event.cap in capabilities:
        name = capabilities[event.cap]
    else:
        name = "?"
    if args.extra:
        print("%-9s %-6d %-6d %-6d %-16s %-4d %-20s %-6d %s" % (strftime("%H:%M:%S"),
            event.uid, event.pid, event.tgid, event.comm.decode('utf-8', 'replace'),
            event.cap, name, event.audit, str(event.insetid) if event.insetid != -1 else "N/A"))
    else:
        print("%-9s %-6d %-6d %-16s %-4d %-20s %-6d" % (strftime("%H:%M:%S"),
            event.uid, event.pid, event.comm.decode('utf-8', 'replace'),
            event.cap, name, event.audit))
    if args.kernel_stack:
        print_stack(bpf, event.kernel_stack_id, StackType.Kernel, -1)
    if args.user_stack:
        print_stack(bpf, event.user_stack_id, StackType.User, event.tgid)

# loop with callback to print_event
callback = partial(print_event, b)
b["events"].open_perf_buffer(callback)
while 1:
    try:
        b.perf_buffer_poll()
    except KeyboardInterrupt:
        exit()