Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…/git/bpf/bpf

Daniel Borkmann says:

====================
pull-request: bpf 2024-04-26

We've added 12 non-merge commits during the last 22 day(s) which contain
a total of 14 files changed, 168 insertions(+), 72 deletions(-).

The main changes are:

1) Fix BPF_PROBE_MEM in verifier and JIT to skip loads from vsyscall page,
   from Puranjay Mohan.

2) Fix a crash in XDP with devmap broadcast redirect when the latter map
   is in process of being torn down, from Toke Høiland-Jørgensen.

3) Fix arm64 and riscv64 BPF JITs to properly clear start time for BPF
   program runtime stats, from Xu Kuohai.

4) Fix a sockmap KCSAN-reported data race in sk_psock_skb_ingress_enqueue,
    from Jason Xing.

5) Fix BPF verifier error message in resolve_pseudo_ldimm64,
   from Anton Protopopov.

6) Fix missing DEBUG_INFO_BTF_MODULES Kconfig menu item,
   from Andrii Nakryiko.

* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
  selftests/bpf: Test PROBE_MEM of VSYSCALL_ADDR on x86-64
  bpf, x86: Fix PROBE_MEM runtime load check
  bpf: verifier: prevent userspace memory access
  xdp: use flags field to disambiguate broadcast redirect
  arm32, bpf: Reimplement sign-extension mov instruction
  riscv, bpf: Fix incorrect runtime stats
  bpf, arm64: Fix incorrect runtime stats
  bpf: Fix a verifier verbose message
  bpf, skmsg: Fix NULL pointer dereference in sk_psock_skb_ingress_enqueue
  MAINTAINERS: bpf: Add Lehui and Puranjay as riscv64 reviewers
  MAINTAINERS: Update email address for Puranjay Mohan
  bpf, kconfig: Fix DEBUG_INFO_BTF_MODULES Kconfig definition
====================

Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Jakub Kicinski <[email protected]>
  • Loading branch information
kuba-moo committed Apr 27, 2024
2 parents 6a30653 + a86538a commit b2ff42c
Show file tree
Hide file tree
Showing 14 changed files with 168 additions and 72 deletions.
1 change: 1 addition & 0 deletions .mailmap
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,7 @@ Praveen BP <[email protected]>
Pradeep Kumar Chitrapu <[email protected]> <[email protected]>
Prasad Sodagudi <[email protected]> <[email protected]>
Punit Agrawal <[email protected]> <[email protected]>
Puranjay Mohan <[email protected]> <[email protected]>
Qais Yousef <[email protected]> <[email protected]>
Qais Yousef <[email protected]> <[email protected]>
Quentin Monnet <[email protected]> <[email protected]>
Expand Down
8 changes: 5 additions & 3 deletions MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,7 @@ F: Documentation/devicetree/bindings/iio/accel/adi,adxl345.yaml
F: drivers/input/misc/adxl34x.c

ADXL355 THREE-AXIS DIGITAL ACCELEROMETER DRIVER
M: Puranjay Mohan <[email protected]>
M: Puranjay Mohan <[email protected]>
L: [email protected]
S: Supported
F: Documentation/devicetree/bindings/iio/accel/adi,adxl355.yaml
Expand Down Expand Up @@ -3714,7 +3714,7 @@ F: drivers/iio/imu/bmi323/

BPF JIT for ARM
M: Russell King <[email protected]>
M: Puranjay Mohan <[email protected]>
M: Puranjay Mohan <[email protected]>
L: [email protected]
S: Maintained
F: arch/arm/net/
Expand Down Expand Up @@ -3764,6 +3764,8 @@ X: arch/riscv/net/bpf_jit_comp64.c

BPF JIT for RISC-V (64-bit)
M: Björn Töpel <[email protected]>
R: Pu Lehui <[email protected]>
R: Puranjay Mohan <[email protected]>
L: [email protected]
S: Maintained
F: arch/riscv/net/
Expand Down Expand Up @@ -21925,7 +21927,7 @@ F: include/linux/soc/ti/ti_sci_inta_msi.h
F: include/linux/soc/ti/ti_sci_protocol.h

TEXAS INSTRUMENTS' TMP117 TEMPERATURE SENSOR DRIVER
M: Puranjay Mohan <[email protected]>
M: Puranjay Mohan <[email protected]>
L: [email protected]
S: Supported
F: Documentation/devicetree/bindings/iio/temperature/ti,tmp117.yaml
Expand Down
56 changes: 43 additions & 13 deletions arch/arm/net/bpf_jit_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -871,16 +871,11 @@ static inline void emit_a32_alu_r64(const bool is64, const s8 dst[],
}

/* dst = src (4 bytes)*/
static inline void emit_a32_mov_r(const s8 dst, const s8 src, const u8 off,
struct jit_ctx *ctx) {
static inline void emit_a32_mov_r(const s8 dst, const s8 src, struct jit_ctx *ctx) {
const s8 *tmp = bpf2a32[TMP_REG_1];
s8 rt;

rt = arm_bpf_get_reg32(src, tmp[0], ctx);
if (off && off != 32) {
emit(ARM_LSL_I(rt, rt, 32 - off), ctx);
emit(ARM_ASR_I(rt, rt, 32 - off), ctx);
}
arm_bpf_put_reg32(dst, rt, ctx);
}

Expand All @@ -889,15 +884,15 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[],
const s8 src[],
struct jit_ctx *ctx) {
if (!is64) {
emit_a32_mov_r(dst_lo, src_lo, 0, ctx);
emit_a32_mov_r(dst_lo, src_lo, ctx);
if (!ctx->prog->aux->verifier_zext)
/* Zero out high 4 bytes */
emit_a32_mov_i(dst_hi, 0, ctx);
} else if (__LINUX_ARM_ARCH__ < 6 &&
ctx->cpu_architecture < CPU_ARCH_ARMv5TE) {
/* complete 8 byte move */
emit_a32_mov_r(dst_lo, src_lo, 0, ctx);
emit_a32_mov_r(dst_hi, src_hi, 0, ctx);
emit_a32_mov_r(dst_lo, src_lo, ctx);
emit_a32_mov_r(dst_hi, src_hi, ctx);
} else if (is_stacked(src_lo) && is_stacked(dst_lo)) {
const u8 *tmp = bpf2a32[TMP_REG_1];

Expand All @@ -917,17 +912,52 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[],
static inline void emit_a32_movsx_r64(const bool is64, const u8 off, const s8 dst[], const s8 src[],
struct jit_ctx *ctx) {
const s8 *tmp = bpf2a32[TMP_REG_1];
const s8 *rt;
s8 rs;
s8 rd;

rt = arm_bpf_get_reg64(dst, tmp, ctx);
if (is_stacked(dst_lo))
rd = tmp[1];
else
rd = dst_lo;
rs = arm_bpf_get_reg32(src_lo, rd, ctx);
/* rs may be one of src[1], dst[1], or tmp[1] */

/* Sign extend rs if needed. If off == 32, lower 32-bits of src are moved to dst and sign
* extension only happens in the upper 64 bits.
*/
if (off != 32) {
/* Sign extend rs into rd */
emit(ARM_LSL_I(rd, rs, 32 - off), ctx);
emit(ARM_ASR_I(rd, rd, 32 - off), ctx);
} else {
rd = rs;
}

/* Write rd to dst_lo
*
* Optimization:
* Assume:
* 1. dst == src and stacked.
* 2. off == 32
*
* In this case src_lo was loaded into rd(tmp[1]) but rd was not sign extended as off==32.
* So, we don't need to write rd back to dst_lo as they have the same value.
* This saves us one str instruction.
*/
if (dst_lo != src_lo || off != 32)
arm_bpf_put_reg32(dst_lo, rd, ctx);

emit_a32_mov_r(dst_lo, src_lo, off, ctx);
if (!is64) {
if (!ctx->prog->aux->verifier_zext)
/* Zero out high 4 bytes */
emit_a32_mov_i(dst_hi, 0, ctx);
} else {
emit(ARM_ASR_I(rt[0], rt[1], 31), ctx);
if (is_stacked(dst_hi)) {
emit(ARM_ASR_I(tmp[0], rd, 31), ctx);
arm_bpf_put_reg32(dst_hi, tmp[0], ctx);
} else {
emit(ARM_ASR_I(dst_hi, rd, 31), ctx);
}
}
}

Expand Down
6 changes: 3 additions & 3 deletions arch/arm64/net/bpf_jit_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1844,15 +1844,15 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,

emit_call(enter_prog, ctx);

/* save return value to callee saved register x20 */
emit(A64_MOV(1, A64_R(20), A64_R(0)), ctx);

/* if (__bpf_prog_enter(prog) == 0)
* goto skip_exec_of_prog;
*/
branch = ctx->image + ctx->idx;
emit(A64_NOP, ctx);

/* save return value to callee saved register x20 */
emit(A64_MOV(1, A64_R(20), A64_R(0)), ctx);

emit(A64_ADD_I(1, A64_R(0), A64_SP, args_off), ctx);
if (!p->jited)
emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx);
Expand Down
6 changes: 3 additions & 3 deletions arch/riscv/net/bpf_jit_comp64.c
Original file line number Diff line number Diff line change
Expand Up @@ -722,16 +722,16 @@ static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_of
if (ret)
return ret;

/* store prog start time */
emit_mv(RV_REG_S1, RV_REG_A0, ctx);

/* if (__bpf_prog_enter(prog) == 0)
* goto skip_exec_of_prog;
*/
branch_off = ctx->ninsns;
/* nop reserved for conditional jump */
emit(rv_nop(), ctx);

/* store prog start time */
emit_mv(RV_REG_S1, RV_REG_A0, ctx);

/* arg1: &args_off */
emit_addi(RV_REG_A0, RV_REG_FP, -args_off, ctx);
if (!p->jited)
Expand Down
63 changes: 31 additions & 32 deletions arch/x86/net/bpf_jit_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1807,36 +1807,41 @@ st: if (is_imm8(insn->off))
if (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
BPF_MODE(insn->code) == BPF_PROBE_MEMSX) {
/* Conservatively check that src_reg + insn->off is a kernel address:
* src_reg + insn->off >= TASK_SIZE_MAX + PAGE_SIZE
* src_reg is used as scratch for src_reg += insn->off and restored
* after emit_ldx if necessary
* src_reg + insn->off > TASK_SIZE_MAX + PAGE_SIZE
* and
* src_reg + insn->off < VSYSCALL_ADDR
*/

u64 limit = TASK_SIZE_MAX + PAGE_SIZE;
u64 limit = TASK_SIZE_MAX + PAGE_SIZE - VSYSCALL_ADDR;
u8 *end_of_jmp;

/* At end of these emitted checks, insn->off will have been added
* to src_reg, so no need to do relative load with insn->off offset
*/
insn_off = 0;
/* movabsq r10, VSYSCALL_ADDR */
emit_mov_imm64(&prog, BPF_REG_AX, (long)VSYSCALL_ADDR >> 32,
(u32)(long)VSYSCALL_ADDR);

/* movabsq r11, limit */
EMIT2(add_1mod(0x48, AUX_REG), add_1reg(0xB8, AUX_REG));
EMIT((u32)limit, 4);
EMIT(limit >> 32, 4);
/* mov src_reg, r11 */
EMIT_mov(AUX_REG, src_reg);

if (insn->off) {
/* add src_reg, insn->off */
maybe_emit_1mod(&prog, src_reg, true);
EMIT2_off32(0x81, add_1reg(0xC0, src_reg), insn->off);
/* add r11, insn->off */
maybe_emit_1mod(&prog, AUX_REG, true);
EMIT2_off32(0x81, add_1reg(0xC0, AUX_REG), insn->off);
}

/* cmp src_reg, r11 */
maybe_emit_mod(&prog, src_reg, AUX_REG, true);
EMIT2(0x39, add_2reg(0xC0, src_reg, AUX_REG));
/* sub r11, r10 */
maybe_emit_mod(&prog, AUX_REG, BPF_REG_AX, true);
EMIT2(0x29, add_2reg(0xC0, AUX_REG, BPF_REG_AX));

/* movabsq r10, limit */
emit_mov_imm64(&prog, BPF_REG_AX, (long)limit >> 32,
(u32)(long)limit);

/* cmp r10, r11 */
maybe_emit_mod(&prog, AUX_REG, BPF_REG_AX, true);
EMIT2(0x39, add_2reg(0xC0, AUX_REG, BPF_REG_AX));

/* if unsigned '>=', goto load */
EMIT2(X86_JAE, 0);
/* if unsigned '>', goto load */
EMIT2(X86_JA, 0);
end_of_jmp = prog;

/* xor dst_reg, dst_reg */
Expand All @@ -1862,18 +1867,6 @@ st: if (is_imm8(insn->off))
/* populate jmp_offset for JMP above */
start_of_ldx[-1] = prog - start_of_ldx;

if (insn->off && src_reg != dst_reg) {
/* sub src_reg, insn->off
* Restore src_reg after "add src_reg, insn->off" in prev
* if statement. But if src_reg == dst_reg, emit_ldx
* above already clobbered src_reg, so no need to restore.
* If add src_reg, insn->off was unnecessary, no need to
* restore either.
*/
maybe_emit_1mod(&prog, src_reg, true);
EMIT2_off32(0x81, add_1reg(0xE8, src_reg), insn->off);
}

if (!bpf_prog->aux->extable)
break;

Expand Down Expand Up @@ -3473,3 +3466,9 @@ bool bpf_jit_supports_ptr_xchg(void)
{
return true;
}

/* x86-64 JIT emits its own code to filter user addresses so return 0 here */
u64 bpf_arch_uaddress_limit(void)
{
return 0;
}
1 change: 1 addition & 0 deletions include/linux/filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -963,6 +963,7 @@ bool bpf_jit_supports_far_kfunc_call(void);
bool bpf_jit_supports_exceptions(void);
bool bpf_jit_supports_ptr_xchg(void);
bool bpf_jit_supports_arena(void);
u64 bpf_arch_uaddress_limit(void);
void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie);
bool bpf_helper_changes_pkt_data(void *func);

Expand Down
2 changes: 2 additions & 0 deletions include/linux/skmsg.h
Original file line number Diff line number Diff line change
Expand Up @@ -461,10 +461,12 @@ static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock)

static inline void sk_psock_data_ready(struct sock *sk, struct sk_psock *psock)
{
read_lock_bh(&sk->sk_callback_lock);
if (psock->saved_data_ready)
psock->saved_data_ready(sk);
else
sk->sk_data_ready(sk);
read_unlock_bh(&sk->sk_callback_lock);
}

static inline void psock_set_prog(struct bpf_prog **pprog,
Expand Down
9 changes: 9 additions & 0 deletions kernel/bpf/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -2942,6 +2942,15 @@ bool __weak bpf_jit_supports_arena(void)
return false;
}

u64 __weak bpf_arch_uaddress_limit(void)
{
#if defined(CONFIG_64BIT) && defined(CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE)
return TASK_SIZE;
#else
return 0;
#endif
}

/* Return TRUE if the JIT backend satisfies the following two conditions:
* 1) JIT backend supports atomic_xchg() on pointer-sized words.
* 2) Under the specific arch, the implementation of xchg() is the same
Expand Down
33 changes: 31 additions & 2 deletions kernel/bpf/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -18289,8 +18289,7 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
f = fdget(fd);
map = __bpf_map_get(f);
if (IS_ERR(map)) {
verbose(env, "fd %d is not pointing to valid bpf_map\n",
insn[0].imm);
verbose(env, "fd %d is not pointing to valid bpf_map\n", fd);
return PTR_ERR(map);
}

Expand Down Expand Up @@ -19676,6 +19675,36 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
goto next_insn;
}

/* Make it impossible to de-reference a userspace address */
if (BPF_CLASS(insn->code) == BPF_LDX &&
(BPF_MODE(insn->code) == BPF_PROBE_MEM ||
BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) {
struct bpf_insn *patch = &insn_buf[0];
u64 uaddress_limit = bpf_arch_uaddress_limit();

if (!uaddress_limit)
goto next_insn;

*patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
if (insn->off)
*patch++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_AX, insn->off);
*patch++ = BPF_ALU64_IMM(BPF_RSH, BPF_REG_AX, 32);
*patch++ = BPF_JMP_IMM(BPF_JLE, BPF_REG_AX, uaddress_limit >> 32, 2);
*patch++ = *insn;
*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
*patch++ = BPF_MOV64_IMM(insn->dst_reg, 0);

cnt = patch - insn_buf;
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
if (!new_prog)
return -ENOMEM;

delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
goto next_insn;
}

/* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
if (BPF_CLASS(insn->code) == BPF_LD &&
(BPF_MODE(insn->code) == BPF_ABS ||
Expand Down
5 changes: 3 additions & 2 deletions lib/Kconfig.debug
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ config DEBUG_INFO_SPLIT
Incompatible with older versions of ccache.

config DEBUG_INFO_BTF
bool "Generate BTF typeinfo"
bool "Generate BTF type information"
depends on !DEBUG_INFO_SPLIT && !DEBUG_INFO_REDUCED
depends on !GCC_PLUGIN_RANDSTRUCT || COMPILE_TEST
depends on BPF_SYSCALL
Expand Down Expand Up @@ -408,7 +408,8 @@ config PAHOLE_HAS_LANG_EXCLUDE
using DEBUG_INFO_BTF_MODULES.

config DEBUG_INFO_BTF_MODULES
def_bool y
bool "Generate BTF type information for kernel modules"
default y
depends on DEBUG_INFO_BTF && MODULES && PAHOLE_HAS_SPLIT_BTF
help
Generate compact split BTF type information for kernel modules.
Expand Down
Loading

0 comments on commit b2ff42c

Please sign in to comment.