Introduce BPF instructions with load-acquire and store-release semantics, as discussed in [1]. The following new flags are defined: BPF_ATOMIC_LOAD 0x10 BPF_ATOMIC_STORE 0x20 BPF_ATOMIC_TYPE(imm) ((imm) & 0xf0) BPF_RELAXED 0x0 BPF_ACQUIRE 0x1 BPF_RELEASE 0x2 BPF_ACQ_REL 0x3 BPF_SEQ_CST 0x4 BPF_LOAD_ACQ (BPF_ATOMIC_LOAD | BPF_ACQUIRE) BPF_STORE_REL (BPF_ATOMIC_STORE | BPF_RELEASE) A "load-acquire" is a BPF_STX | BPF_ATOMIC instruction with the 'imm' field set to BPF_LOAD_ACQ (0x11). Similarly, a "store-release" is a BPF_STX | BPF_ATOMIC instruction with the 'imm' field set to BPF_STORE_REL (0x22). Unlike existing atomic operations that only support BPF_W (32-bit) and BPF_DW (64-bit) size modifiers, load-acquires and store-releases also support BPF_B (8-bit) and BPF_H (16-bit). An 8- or 16-bit load-acquire zero-extends the value before writing it to a 32-bit register, just like ARM64 instruction LDARH and friends. As an example, consider the following 64-bit load-acquire BPF instruction: db 10 00 00 11 00 00 00 r0 = load_acquire((u64 *)(r1 + 0x0)) opcode (0xdb): BPF_ATOMIC | BPF_DW | BPF_STX imm (0x00000011): BPF_LOAD_ACQ Similarly, a 16-bit BPF store-release: cb 21 00 00 22 00 00 00 store_release((u16 *)(r1 + 0x0), w2) opcode (0xcb): BPF_ATOMIC | BPF_H | BPF_STX imm (0x00000022): BPF_STORE_REL In arch/{arm64,s390,x86}/net/bpf_jit_comp.c, have bpf_jit_supports_insn(..., /*in_arena=*/true) return false for the new instructions, until the corresponding JIT compiler supports them. [1] https://lore.kernel.org/all/20240729183246.4110549-1-yepeilin@xxxxxxxxxx/ Acked-by: Eduard Zingerman <eddyz87@xxxxxxxxx> Signed-off-by: Peilin Ye <yepeilin@xxxxxxxxxx> --- arch/arm64/net/bpf_jit_comp.c | 4 +++ arch/s390/net/bpf_jit_comp.c | 14 +++++--- arch/x86/net/bpf_jit_comp.c | 4 +++ include/linux/bpf.h | 11 ++++++ include/linux/filter.h | 2 ++ include/uapi/linux/bpf.h | 13 +++++++ kernel/bpf/core.c | 63 ++++++++++++++++++++++++++++++---- kernel/bpf/disasm.c | 12 +++++++ kernel/bpf/verifier.c | 45 ++++++++++++++++++++++-- tools/include/uapi/linux/bpf.h | 13 +++++++ 10 files changed, 168 insertions(+), 13 deletions(-) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 8446848edddb..8c3b47d9e441 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -2667,8 +2667,12 @@ bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) if (!in_arena) return true; switch (insn->code) { + case BPF_STX | BPF_ATOMIC | BPF_B: + case BPF_STX | BPF_ATOMIC | BPF_H: case BPF_STX | BPF_ATOMIC | BPF_W: case BPF_STX | BPF_ATOMIC | BPF_DW: + if (bpf_atomic_is_load_store(insn)) + return false; if (!cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) return false; } diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 9d440a0b729e..0776dfde2dba 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -2919,10 +2919,16 @@ bool bpf_jit_supports_arena(void) bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) { - /* - * Currently the verifier uses this function only to check which - * atomic stores to arena are supported, and they all are. - */ + if (!in_arena) + return true; + switch (insn->code) { + case BPF_STX | BPF_ATOMIC | BPF_B: + case BPF_STX | BPF_ATOMIC | BPF_H: + case BPF_STX | BPF_ATOMIC | BPF_W: + case BPF_STX | BPF_ATOMIC | BPF_DW: + if (bpf_atomic_is_load_store(insn)) + return false; + } return true; } diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index a43fc5af973d..f0c31c940fb8 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -3771,8 +3771,12 @@ bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) if (!in_arena) return true; switch (insn->code) { + case BPF_STX | BPF_ATOMIC | BPF_B: + case BPF_STX | BPF_ATOMIC | BPF_H: case BPF_STX | BPF_ATOMIC | BPF_W: case BPF_STX | BPF_ATOMIC | BPF_DW: + if (bpf_atomic_is_load_store(insn)) + return false; if (insn->imm == (BPF_AND | BPF_FETCH) || insn->imm == (BPF_OR | BPF_FETCH) || insn->imm == (BPF_XOR | BPF_FETCH)) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f3f50e29d639..96c936fd125f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -990,6 +990,17 @@ static inline bool bpf_pseudo_func(const struct bpf_insn *insn) return bpf_is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC; } +/* Given a BPF_ATOMIC instruction @atomic_insn, return true if it is an + * atomic load or store, and false if it is a read-modify-write instruction. + */ +static inline bool +bpf_atomic_is_load_store(const struct bpf_insn *atomic_insn) +{ + const s32 type = BPF_ATOMIC_TYPE(atomic_insn->imm); + + return type == BPF_ATOMIC_LOAD || type == BPF_ATOMIC_STORE; +} + struct bpf_prog_ops { int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); diff --git a/include/linux/filter.h b/include/linux/filter.h index a3ea46281595..e36812a5b01f 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -364,6 +364,8 @@ static inline bool insn_is_cast_user(const struct bpf_insn *insn) * BPF_XOR | BPF_FETCH src_reg = atomic_fetch_xor(dst_reg + off16, src_reg); * BPF_XCHG src_reg = atomic_xchg(dst_reg + off16, src_reg) * BPF_CMPXCHG r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg) + * BPF_LOAD_ACQ dst_reg = smp_load_acquire(src_reg + off16) + * BPF_STORE_REL smp_store_release(dst_reg + off16, src_reg) */ #define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF) \ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index fff6cdb8d11a..e78306e6e2be 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -51,6 +51,19 @@ #define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */ #define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */ +#define BPF_ATOMIC_LOAD 0x10 +#define BPF_ATOMIC_STORE 0x20 +#define BPF_ATOMIC_TYPE(imm) ((imm) & 0xf0) + +#define BPF_RELAXED 0x00 +#define BPF_ACQUIRE 0x01 +#define BPF_RELEASE 0x02 +#define BPF_ACQ_REL 0x03 +#define BPF_SEQ_CST 0x04 + +#define BPF_LOAD_ACQ (BPF_ATOMIC_LOAD | BPF_ACQUIRE) /* load-acquire */ +#define BPF_STORE_REL (BPF_ATOMIC_STORE | BPF_RELEASE) /* store-release */ + enum bpf_cond_pseudo_jmp { BPF_MAY_GOTO = 0, }; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index da729cbbaeb9..3f3127479a93 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1663,14 +1663,17 @@ EXPORT_SYMBOL_GPL(__bpf_call_base); INSN_3(JMP, JSET, K), \ INSN_2(JMP, JA), \ INSN_2(JMP32, JA), \ + /* Atomic operations. */ \ + INSN_3(STX, ATOMIC, B), \ + INSN_3(STX, ATOMIC, H), \ + INSN_3(STX, ATOMIC, W), \ + INSN_3(STX, ATOMIC, DW), \ /* Store instructions. */ \ /* Register based. */ \ INSN_3(STX, MEM, B), \ INSN_3(STX, MEM, H), \ INSN_3(STX, MEM, W), \ INSN_3(STX, MEM, DW), \ - INSN_3(STX, ATOMIC, W), \ - INSN_3(STX, ATOMIC, DW), \ /* Immediate based. */ \ INSN_3(ST, MEM, B), \ INSN_3(ST, MEM, H), \ @@ -2152,24 +2155,33 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) if (BPF_SIZE(insn->code) == BPF_W) \ atomic_##KOP((u32) SRC, (atomic_t *)(unsigned long) \ (DST + insn->off)); \ - else \ + else if (BPF_SIZE(insn->code) == BPF_DW) \ atomic64_##KOP((u64) SRC, (atomic64_t *)(unsigned long) \ (DST + insn->off)); \ + else \ + goto default_label; \ break; \ case BOP | BPF_FETCH: \ if (BPF_SIZE(insn->code) == BPF_W) \ SRC = (u32) atomic_fetch_##KOP( \ (u32) SRC, \ (atomic_t *)(unsigned long) (DST + insn->off)); \ - else \ + else if (BPF_SIZE(insn->code) == BPF_DW) \ SRC = (u64) atomic64_fetch_##KOP( \ (u64) SRC, \ (atomic64_t *)(unsigned long) (DST + insn->off)); \ + else \ + goto default_label; \ break; STX_ATOMIC_DW: STX_ATOMIC_W: + STX_ATOMIC_H: + STX_ATOMIC_B: switch (IMM) { + /* Atomic read-modify-write instructions support only W and DW + * size modifiers. + */ ATOMIC_ALU_OP(BPF_ADD, add) ATOMIC_ALU_OP(BPF_AND, and) ATOMIC_ALU_OP(BPF_OR, or) @@ -2181,20 +2193,59 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) SRC = (u32) atomic_xchg( (atomic_t *)(unsigned long) (DST + insn->off), (u32) SRC); - else + else if (BPF_SIZE(insn->code) == BPF_DW) SRC = (u64) atomic64_xchg( (atomic64_t *)(unsigned long) (DST + insn->off), (u64) SRC); + else + goto default_label; break; case BPF_CMPXCHG: if (BPF_SIZE(insn->code) == BPF_W) BPF_R0 = (u32) atomic_cmpxchg( (atomic_t *)(unsigned long) (DST + insn->off), (u32) BPF_R0, (u32) SRC); - else + else if (BPF_SIZE(insn->code) == BPF_DW) BPF_R0 = (u64) atomic64_cmpxchg( (atomic64_t *)(unsigned long) (DST + insn->off), (u64) BPF_R0, (u64) SRC); + else + goto default_label; + break; + /* Atomic load and store instructions support all size + * modifiers. + */ + case BPF_LOAD_ACQ: + switch (BPF_SIZE(insn->code)) { +#define LOAD_ACQUIRE(SIZEOP, SIZE) \ + case BPF_##SIZEOP: \ + DST = (SIZE)smp_load_acquire( \ + (SIZE *)(unsigned long)(SRC + insn->off)); \ + break; + LOAD_ACQUIRE(B, u8) + LOAD_ACQUIRE(H, u16) + LOAD_ACQUIRE(W, u32) + LOAD_ACQUIRE(DW, u64) +#undef LOAD_ACQUIRE + default: + goto default_label; + } + break; + case BPF_STORE_REL: + switch (BPF_SIZE(insn->code)) { +#define STORE_RELEASE(SIZEOP, SIZE) \ + case BPF_##SIZEOP: \ + smp_store_release( \ + (SIZE *)(unsigned long)(DST + insn->off), (SIZE)SRC); \ + break; + STORE_RELEASE(B, u8) + STORE_RELEASE(H, u16) + STORE_RELEASE(W, u32) + STORE_RELEASE(DW, u64) +#undef STORE_RELEASE + default: + goto default_label; + } break; default: diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c index 309c4aa1b026..974d172d6735 100644 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@ -267,6 +267,18 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs, BPF_SIZE(insn->code) == BPF_DW ? "64" : "", bpf_ldst_string[BPF_SIZE(insn->code) >> 3], insn->dst_reg, insn->off, insn->src_reg); + } else if (BPF_MODE(insn->code) == BPF_ATOMIC && + insn->imm == BPF_LOAD_ACQ) { + verbose(cbs->private_data, "(%02x) r%d = load_acquire((%s *)(r%d %+d))\n", + insn->code, insn->dst_reg, + bpf_ldst_string[BPF_SIZE(insn->code) >> 3], + insn->src_reg, insn->off); + } else if (BPF_MODE(insn->code) == BPF_ATOMIC && + insn->imm == BPF_STORE_REL) { + verbose(cbs->private_data, "(%02x) store_release((%s *)(r%d %+d), r%d)\n", + insn->code, + bpf_ldst_string[BPF_SIZE(insn->code) >> 3], + insn->dst_reg, insn->off, insn->src_reg); } else { verbose(cbs->private_data, "BUG_%02x\n", insn->code); } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 82a5a4acf576..7ebc224bf9cb 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -579,6 +579,13 @@ static bool is_cmpxchg_insn(const struct bpf_insn *insn) insn->imm == BPF_CMPXCHG; } +static bool is_atomic_load_insn(const struct bpf_insn *insn) +{ + return BPF_CLASS(insn->code) == BPF_STX && + BPF_MODE(insn->code) == BPF_ATOMIC && + BPF_ATOMIC_TYPE(insn->imm) == BPF_ATOMIC_LOAD; +} + static int __get_spi(s32 off) { return (-off - 1) / BPF_REG_SIZE; @@ -3481,7 +3488,7 @@ static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn, } if (class == BPF_STX) { - /* BPF_STX (including atomic variants) has multiple source + /* BPF_STX (including atomic variants) has one or more source * operands, one of which is a ptr. Check whether the caller is * asking about it. */ @@ -4095,7 +4102,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, * dreg still needs precision before this insn */ } - } else if (class == BPF_LDX) { + } else if (class == BPF_LDX || is_atomic_load_insn(insn)) { if (!bt_is_reg_set(bt, dreg)) return 0; bt_clear_reg(bt, dreg); @@ -7686,6 +7693,32 @@ static int check_atomic_rmw(struct bpf_verifier_env *env, return 0; } +static int check_atomic_load(struct bpf_verifier_env *env, + struct bpf_insn *insn) +{ + if (!atomic_ptr_type_ok(env, insn->src_reg, insn)) { + verbose(env, "BPF_ATOMIC loads from R%d %s is not allowed\n", + insn->src_reg, + reg_type_str(env, reg_state(env, insn->src_reg)->type)); + return -EACCES; + } + + return check_load_mem(env, insn, true, false, false, "atomic_load"); +} + +static int check_atomic_store(struct bpf_verifier_env *env, + struct bpf_insn *insn) +{ + if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) { + verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n", + insn->dst_reg, + reg_type_str(env, reg_state(env, insn->dst_reg)->type)); + return -EACCES; + } + + return check_store_reg(env, insn, true); +} + static int check_atomic(struct bpf_verifier_env *env, struct bpf_insn *insn) { switch (insn->imm) { @@ -7700,6 +7733,10 @@ static int check_atomic(struct bpf_verifier_env *env, struct bpf_insn *insn) case BPF_XCHG: case BPF_CMPXCHG: return check_atomic_rmw(env, insn); + case BPF_LOAD_ACQ: + return check_atomic_load(env, insn); + case BPF_STORE_REL: + return check_atomic_store(env, insn); default: verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm); @@ -20445,7 +20482,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) insn->code == (BPF_ST | BPF_MEM | BPF_W) || insn->code == (BPF_ST | BPF_MEM | BPF_DW)) { type = BPF_WRITE; - } else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) || + } else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_B) || + insn->code == (BPF_STX | BPF_ATOMIC | BPF_H) || + insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) || insn->code == (BPF_STX | BPF_ATOMIC | BPF_DW)) && env->insn_aux_data[i + delta].ptr_type == PTR_TO_ARENA) { insn->code = BPF_STX | BPF_PROBE_ATOMIC | BPF_SIZE(insn->code); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 2acf9b336371..4a20a125eb46 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -51,6 +51,19 @@ #define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */ #define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */ +#define BPF_ATOMIC_LOAD 0x10 +#define BPF_ATOMIC_STORE 0x20 +#define BPF_ATOMIC_TYPE(imm) ((imm) & 0xf0) + +#define BPF_RELAXED 0x00 +#define BPF_ACQUIRE 0x01 +#define BPF_RELEASE 0x02 +#define BPF_ACQ_REL 0x03 +#define BPF_SEQ_CST 0x04 + +#define BPF_LOAD_ACQ (BPF_ATOMIC_LOAD | BPF_ACQUIRE) /* load-acquire */ +#define BPF_STORE_REL (BPF_ATOMIC_STORE | BPF_RELEASE) /* store-release */ + enum bpf_cond_pseudo_jmp { BPF_MAY_GOTO = 0, }; -- 2.48.1.502.g6dc24dfdaf-goog