diff --git a/docs/kernel-versions.md b/docs/kernel-versions.md index 288fb0772ea5..28c2e916e198 100644 --- a/docs/kernel-versions.md +++ b/docs/kernel-versions.md @@ -199,6 +199,8 @@ Alphabetical order Helper | Kernel version | License | Commit | -------|----------------|---------|--------| `BPF_FUNC_bind()` | 4.17 | | [`d74bad4e74ee`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=d74bad4e74ee373787a9ae24197c17b7cdc428d5) | +`BPF_FUNC_bpf_per_cpu_ptr()` | 5.10 | | [`eaa6bcb71ef6`](https://github.com/torvalds/linux/commit/eaa6bcb71ef6ed3dc18fc525ee7e293b06b4882b) | +`BPF_FUNC_bpf_this_cpu_ptr()` | 5.10 | | [`63d9b80dcf2c`](https://github.com/torvalds/linux/commit/63d9b80dcf2c67bc5ade61cbbaa09d7af21f43f1) | `BPF_FUNC_clone_redirect()` | 4.2 | | [`3896d655f4d4`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=3896d655f4d491c67d669a15f275a39f713410f8) `BPF_FUNC_copy_from_user()` | 5.10 | | [`07be4c4a3e7a`](https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git/commit?id=07be4c4a3e7a0db148e44b16c5190e753d1c8569) `BPF_FUNC_csum_diff()` | 4.6 | | [`7d672345ed29`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=7d672345ed295b1356a5d9f7111da1d1d7d65867) @@ -272,6 +274,7 @@ Helper | Kernel version | License | Commit | `BPF_FUNC_redirect()` | 4.4 | | [`27b29f63058d`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=27b29f63058d26c6c1742f1993338280d5a41dc6) `BPF_FUNC_redirect_map()` | 4.14 | | [`97f91a7cf04f`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=97f91a7cf04ff605845c20948b8a80e54cbd3376) `BPF_FUNC_redirect_neigh()` | 5.10 | | [`b4ab31414970`](https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git/commit/?id=b4ab31414970a7a03a5d55d75083f2c101a30592) +`BPF_FUNC_redirect_peer()` | 5.10 | | [`9aa1206e8f48`](https://github.com/torvalds/linux/commit/9aa1206e8f48222f35a0c809f33b2f4aaa1e2661) `BPF_FUNC_reserve_hdr_opt()` | 5.10 | | [`0813a841566f`](https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git/commit?id=0813a841566f0962a5551be7749b43c45f0022a0) `BPF_FUNC_ringbuf_discard()` | 5.8 | | [`457f44363a88`](https://github.com/torvalds/linux/commit/457f44363a8894135c85b7a9afd2bd8196db24ab) `BPF_FUNC_ringbuf_output()` | 5.8 | | [`457f44363a88`](https://github.com/torvalds/linux/commit/457f44363a8894135c85b7a9afd2bd8196db24ab) diff --git a/src/cc/compat/linux/virtual_bpf.h b/src/cc/compat/linux/virtual_bpf.h index a997ab5a4e43..9554852e3123 100644 --- a/src/cc/compat/linux/virtual_bpf.h +++ b/src/cc/compat/linux/virtual_bpf.h @@ -357,18 +357,36 @@ enum bpf_link_type { #define BPF_F_SLEEPABLE (1U << 4) /* When BPF ldimm64's insn[0].src_reg != 0 then this can have - * two extensions: - * - * insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE - * insn[0].imm: map fd map fd - * insn[1].imm: 0 offset into value - * insn[0].off: 0 0 - * insn[1].off: 0 0 - * ldimm64 rewrite: address of map address of map[0]+offset - * verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE + * the following extensions: + * + * insn[0].src_reg: BPF_PSEUDO_MAP_FD + * insn[0].imm: map fd + * insn[1].imm: 0 + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of map + * verifier type: CONST_PTR_TO_MAP */ #define BPF_PSEUDO_MAP_FD 1 +/* insn[0].src_reg: BPF_PSEUDO_MAP_VALUE + * insn[0].imm: map fd + * insn[1].imm: offset into value + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of map[0]+offset + * verifier type: PTR_TO_MAP_VALUE + */ #define BPF_PSEUDO_MAP_VALUE 2 +/* insn[0].src_reg: BPF_PSEUDO_BTF_ID + * insn[0].imm: kernel btd id of VAR + * insn[1].imm: 0 + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of the kernel variable + * verifier type: PTR_TO_BTF_ID or PTR_TO_MEM, depending on whether the var + * is struct/union. + */ +#define BPF_PSEUDO_BTF_ID 3 /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative * offset to another bpf function @@ -415,6 +433,12 @@ enum { /* Enable memory-mapping BPF map */ BPF_F_MMAPABLE = (1U << 10), + +/* Share perf_event among processes */ + BPF_F_PRESERVE_ELEMS = (1U << 11), + +/* Create a map that is suitable to be an inner map with dynamic max entries */ + BPF_F_INNER_MAP = (1U << 12), }; /* Flags for BPF_PROG_QUERY. */ @@ -1678,7 +1702,7 @@ union bpf_attr { * **TCP_CONGESTION**, **TCP_BPF_IW**, * **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**, * **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**, - * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**. + * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**. * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. * Return @@ -2233,7 +2257,7 @@ union bpf_attr { * Description * This helper is used in programs implementing policies at the * skb socket level. If the sk_buff *skb* is allowed to pass (i.e. - * if the verdeict eBPF program returns **SK_PASS**), redirect it + * if the verdict eBPF program returns **SK_PASS**), redirect it * to the socket referenced by *map* (of type * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and * egress interfaces can be used for redirection. The @@ -3654,15 +3678,68 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * long bpf_redirect_neigh(u32 ifindex, u64 flags) + * long bpf_redirect_neigh(u32 ifindex, struct bpf_redir_neigh *params, int plen, u64 flags) * Description * Redirect the packet to another net device of index *ifindex* * and fill in L2 addresses from neighboring subsystem. This helper * is somewhat similar to **bpf_redirect**\ (), except that it - * fills in e.g. MAC addresses based on the L3 information from - * the packet. This helper is supported for IPv4 and IPv6 protocols. + * populates L2 addresses as well, meaning, internally, the helper + * relies on the neighbor lookup for the L2 address of the nexthop. + * + * The helper will perform a FIB lookup based on the skb's + * networking header to get the address of the next hop, unless + * this is supplied by the caller in the *params* argument. The + * *plen* argument indicates the len of *params* and should be set + * to 0 if *params* is NULL. + * + * The *flags* argument is reserved and must be 0. The helper is + * currently only supported for tc BPF program types, and enabled + * for IPv4 and IPv6 protocols. + * Return + * The helper returns **TC_ACT_REDIRECT** on success or + * **TC_ACT_SHOT** on error. + * + * void *bpf_per_cpu_ptr(const void *percpu_ptr, u32 cpu) + * Description + * Take a pointer to a percpu ksym, *percpu_ptr*, and return a + * pointer to the percpu kernel variable on *cpu*. A ksym is an + * extern variable decorated with '__ksym'. For ksym, there is a + * global var (either static or global) defined of the same name + * in the kernel. The ksym is percpu if the global var is percpu. + * The returned pointer points to the global percpu var on *cpu*. + * + * bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the + * kernel, except that bpf_per_cpu_ptr() may return NULL. This + * happens if *cpu* is larger than nr_cpu_ids. The caller of + * bpf_per_cpu_ptr() must check the returned value. + * Return + * A pointer pointing to the kernel percpu variable on *cpu*, or + * NULL, if *cpu* is invalid. + * + * void *bpf_this_cpu_ptr(const void *percpu_ptr) + * Description + * Take a pointer to a percpu ksym, *percpu_ptr*, and return a + * pointer to the percpu kernel variable on this cpu. See the + * description of 'ksym' in **bpf_per_cpu_ptr**\ (). + * + * bpf_this_cpu_ptr() has the same semantic as this_cpu_ptr() in + * the kernel. Different from **bpf_per_cpu_ptr**\ (), it would + * never return NULL. + * Return + * A pointer pointing to the kernel percpu variable on this cpu. + * + * long bpf_redirect_peer(u32 ifindex, u64 flags) + * Description + * Redirect the packet to another net device of index *ifindex*. + * This helper is somewhat similar to **bpf_redirect**\ (), except + * that the redirection happens to the *ifindex*' peer device and + * the netns switch takes place from ingress to ingress without + * going through the CPU's backlog queue. + * * The *flags* argument is reserved and must be 0. The helper is - * currently only supported for tc BPF program types. + * currently only supported for tc BPF program types at the ingress + * hook and for veth device types. The peer device must reside in a + * different network namespace. * Return * The helper returns **TC_ACT_REDIRECT** on success or * **TC_ACT_SHOT** on error. @@ -3821,6 +3898,9 @@ union bpf_attr { FN(seq_printf_btf), \ FN(skb_cgroup_classid), \ FN(redirect_neigh), \ + FN(bpf_per_cpu_ptr), \ + FN(bpf_this_cpu_ptr), \ + FN(redirect_peer), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -4831,6 +4911,16 @@ struct bpf_fib_lookup { __u8 dmac[6]; /* ETH_ALEN */ }; +struct bpf_redir_neigh { + /* network family for lookup (AF_INET, AF_INET6) */ + __u32 nh_family; + /* network address of nexthop; skips fib lookup to find gateway */ + union { + __be32 ipv4_nh; + __u32 ipv6_nh[4]; /* in6_addr; network order */ + }; +}; + enum bpf_task_fd_type { BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */ BPF_FD_TYPE_TRACEPOINT, /* tp name */ diff --git a/src/cc/export/helpers.h b/src/cc/export/helpers.h index b3d37dfb95b9..0acd2d9e1d6e 100644 --- a/src/cc/export/helpers.h +++ b/src/cc/export/helpers.h @@ -784,16 +784,22 @@ static long (*bpf_d_path)(struct path *path, char *buf, u32 sz) = static long (*bpf_copy_from_user)(void *dst, u32 size, const void *user_ptr) = (void *)BPF_FUNC_copy_from_user; -static long (*bpf_snprintf_btf)(char *str, __u32 str_size, struct btf_ptr *ptr, - __u32 btf_ptr_size, __u64 flags) = +static long (*bpf_snprintf_btf)(char *str, u32 str_size, struct btf_ptr *ptr, + u32 btf_ptr_size, u64 flags) = (void *)BPF_FUNC_snprintf_btf; static long (*bpf_seq_printf_btf)(struct seq_file *m, struct btf_ptr *ptr, - __u32 ptr_size, __u64 flags) = + u32 ptr_size, u64 flags) = (void *)BPF_FUNC_seq_printf_btf; -static __u64 (*bpf_skb_cgroup_classid)(struct __sk_buff *skb) = +static u64 (*bpf_skb_cgroup_classid)(struct sk_buff *skb) = (void *)BPF_FUNC_skb_cgroup_classid; -static long (*bpf_redirect_neigh)(__u32 ifindex, __u64 flags) = +static long (*bpf_redirect_neigh)(u32 ifindex, struct bpf_redir_neigh *params, + u64 flags) = (void *)BPF_FUNC_redirect_neigh; +static void * (*bpf_per_cpu_ptr)(const void *percpu_ptr, u32 cpu) = + (void *)BPF_FUNC_bpf_per_cpu_ptr; +static void * (*bpf_this_cpu_ptr)(const void *percpu_ptr) = + (void *)BPF_FUNC_bpf_this_cpu_ptr; +long (*bpf_redirect_peer)(u32 ifindex, u64 flags) = (void *)BPF_FUNC_redirect_peer; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/src/cc/libbpf b/src/cc/libbpf index b6dd2f2b7df4..d1fd50d47577 160000 --- a/src/cc/libbpf +++ b/src/cc/libbpf @@ -1 +1 @@ -Subproject commit b6dd2f2b7df4d3bd35d64aaf521d9ad18d766f53 +Subproject commit d1fd50d475779f64805fdc28f912547b9e3dee8a diff --git a/src/cc/libbpf.c b/src/cc/libbpf.c index 21efdf74602d..cfca34d2642b 100644 --- a/src/cc/libbpf.c +++ b/src/cc/libbpf.c @@ -251,6 +251,9 @@ static struct bpf_helper helpers[] = { {"seq_printf_btf", "5.10"}, {"skb_cgroup_classid", "5.10"}, {"redirect_neigh", "5.10"}, + {"per_cpu_ptr", "5.10"}, + {"this_cpu_ptr", "5.10"}, + {"redirect_peer", "5.10"}, }; static uint64_t ptr_to_u64(void *ptr)