Introduce BPF instructions with load-acquire and store-release semantics, as discussed in [1]. The following new flags are defined: BPF_ATOMIC_LOAD 0x10 BPF_ATOMIC_STORE 0x20 BPF_ATOMIC_TYPE(imm) ((imm) & 0xf0) BPF_RELAXED 0x0 BPF_ACQUIRE 0x1 BPF_RELEASE 0x2 BPF_ACQ_REL 0x3 BPF_SEQ_CST 0x4 BPF_LOAD_ACQ (BPF_ATOMIC_LOAD | BPF_ACQUIRE) BPF_STORE_REL (BPF_ATOMIC_STORE | BPF_RELEASE) A "load-acquire" is a BPF_STX | BPF_ATOMIC instruction with the 'imm' field set to BPF_LOAD_ACQ (0x11). Similarly, a "store-release" is a BPF_STX | BPF_ATOMIC instruction with the 'imm' field set to BPF_STORE_REL (0x22). Unlike existing atomic operations that only support BPF_W (32-bit) and BPF_DW (64-bit) size modifiers, load-acquires and store-releases also support BPF_B (8-bit) and BPF_H (16-bit). An 8- or 16-bit load-acquire zero-extends the value before writing it to a 32-bit register, just like ARM64 instruction LDARH and friends. As an example, consider the following 64-bit load-acquire BPF instruction: db 10 00 00 11 00 00 00 r0 = load_acquire((u64 *)(r1 + 0x0)) opcode (0xdb): BPF_ATOMIC | BPF_DW | BPF_STX imm (0x00000011): BPF_LOAD_ACQ Similarly, a 16-bit BPF store-release: cb 21 00 00 22 00 00 00 store_release((u16 *)(r1 + 0x0), w2) opcode (0xcb): BPF_ATOMIC | BPF_H | BPF_STX imm (0x00000022): BPF_STORE_REL [1] https://lore.kernel.org/all/20240729183246.4110549-1-yepeilin@xxxxxxxxxx/ Signed-off-by: Peilin Ye <yepeilin@xxxxxxxxxx> --- include/linux/filter.h | 2 + include/uapi/linux/bpf.h | 13 +++++ kernel/bpf/core.c | 41 +++++++++++++- kernel/bpf/disasm.c | 12 +++++ kernel/bpf/verifier.c | 99 ++++++++++++++++++++++++++++++++-- tools/include/uapi/linux/bpf.h | 13 +++++ 6 files changed, 175 insertions(+), 5 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index a3ea46281595..e36812a5b01f 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -364,6 +364,8 @@ static inline bool insn_is_cast_user(const struct bpf_insn *insn) * BPF_XOR | BPF_FETCH src_reg = atomic_fetch_xor(dst_reg + off16, src_reg); * BPF_XCHG src_reg = atomic_xchg(dst_reg + off16, src_reg) * BPF_CMPXCHG r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg) + * BPF_LOAD_ACQ dst_reg = smp_load_acquire(src_reg + off16) + * BPF_STORE_REL smp_store_release(dst_reg + off16, src_reg) */ #define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF) \ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2acf9b336371..4a20a125eb46 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -51,6 +51,19 @@ #define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */ #define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */ +#define BPF_ATOMIC_LOAD 0x10 +#define BPF_ATOMIC_STORE 0x20 +#define BPF_ATOMIC_TYPE(imm) ((imm) & 0xf0) + +#define BPF_RELAXED 0x00 +#define BPF_ACQUIRE 0x01 +#define BPF_RELEASE 0x02 +#define BPF_ACQ_REL 0x03 +#define BPF_SEQ_CST 0x04 + +#define BPF_LOAD_ACQ (BPF_ATOMIC_LOAD | BPF_ACQUIRE) /* load-acquire */ +#define BPF_STORE_REL (BPF_ATOMIC_STORE | BPF_RELEASE) /* store-release */ + enum bpf_cond_pseudo_jmp { BPF_MAY_GOTO = 0, }; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index da729cbbaeb9..ab082ab9d535 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1663,14 +1663,17 @@ EXPORT_SYMBOL_GPL(__bpf_call_base); INSN_3(JMP, JSET, K), \ INSN_2(JMP, JA), \ INSN_2(JMP32, JA), \ + /* Atomic operations. */ \ + INSN_3(STX, ATOMIC, B), \ + INSN_3(STX, ATOMIC, H), \ + INSN_3(STX, ATOMIC, W), \ + INSN_3(STX, ATOMIC, DW), \ /* Store instructions. */ \ /* Register based. */ \ INSN_3(STX, MEM, B), \ INSN_3(STX, MEM, H), \ INSN_3(STX, MEM, W), \ INSN_3(STX, MEM, DW), \ - INSN_3(STX, ATOMIC, W), \ - INSN_3(STX, ATOMIC, DW), \ /* Immediate based. */ \ INSN_3(ST, MEM, B), \ INSN_3(ST, MEM, H), \ @@ -2169,6 +2172,8 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) STX_ATOMIC_DW: STX_ATOMIC_W: + STX_ATOMIC_H: + STX_ATOMIC_B: switch (IMM) { ATOMIC_ALU_OP(BPF_ADD, add) ATOMIC_ALU_OP(BPF_AND, and) @@ -2196,6 +2201,38 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) (atomic64_t *)(unsigned long) (DST + insn->off), (u64) BPF_R0, (u64) SRC); break; + case BPF_LOAD_ACQ: + switch (BPF_SIZE(insn->code)) { +#define LOAD_ACQUIRE(SIZEOP, SIZE) \ + case BPF_##SIZEOP: \ + DST = (SIZE)smp_load_acquire( \ + (SIZE *)(unsigned long)(SRC + insn->off)); \ + break; + LOAD_ACQUIRE(B, u8) + LOAD_ACQUIRE(H, u16) + LOAD_ACQUIRE(W, u32) + LOAD_ACQUIRE(DW, u64) +#undef LOAD_ACQUIRE + default: + goto default_label; + } + break; + case BPF_STORE_REL: + switch (BPF_SIZE(insn->code)) { +#define STORE_RELEASE(SIZEOP, SIZE) \ + case BPF_##SIZEOP: \ + smp_store_release( \ + (SIZE *)(unsigned long)(DST + insn->off), (SIZE)SRC); \ + break; + STORE_RELEASE(B, u8) + STORE_RELEASE(H, u16) + STORE_RELEASE(W, u32) + STORE_RELEASE(DW, u64) +#undef STORE_RELEASE + default: + goto default_label; + } + break; default: goto default_label; diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c index 309c4aa1b026..974d172d6735 100644 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@ -267,6 +267,18 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs, BPF_SIZE(insn->code) == BPF_DW ? "64" : "", bpf_ldst_string[BPF_SIZE(insn->code) >> 3], insn->dst_reg, insn->off, insn->src_reg); + } else if (BPF_MODE(insn->code) == BPF_ATOMIC && + insn->imm == BPF_LOAD_ACQ) { + verbose(cbs->private_data, "(%02x) r%d = load_acquire((%s *)(r%d %+d))\n", + insn->code, insn->dst_reg, + bpf_ldst_string[BPF_SIZE(insn->code) >> 3], + insn->src_reg, insn->off); + } else if (BPF_MODE(insn->code) == BPF_ATOMIC && + insn->imm == BPF_STORE_REL) { + verbose(cbs->private_data, "(%02x) store_release((%s *)(r%d %+d), r%d)\n", + insn->code, + bpf_ldst_string[BPF_SIZE(insn->code) >> 3], + insn->dst_reg, insn->off, insn->src_reg); } else { verbose(cbs->private_data, "BUG_%02x\n", insn->code); } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2b3caa7549af..e44abe22aac9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -579,6 +579,13 @@ static bool is_cmpxchg_insn(const struct bpf_insn *insn) insn->imm == BPF_CMPXCHG; } +static bool is_atomic_load_insn(const struct bpf_insn *insn) +{ + return BPF_CLASS(insn->code) == BPF_STX && + BPF_MODE(insn->code) == BPF_ATOMIC && + BPF_ATOMIC_TYPE(insn->imm) == BPF_ATOMIC_LOAD; +} + static int __get_spi(s32 off) { return (-off - 1) / BPF_REG_SIZE; @@ -3481,7 +3488,7 @@ static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn, } if (class == BPF_STX) { - /* BPF_STX (including atomic variants) has multiple source + /* BPF_STX (including atomic variants) has one or more source * operands, one of which is a ptr. Check whether the caller is * asking about it. */ @@ -4095,7 +4102,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, * dreg still needs precision before this insn */ } - } else if (class == BPF_LDX) { + } else if (class == BPF_LDX || is_atomic_load_insn(insn)) { if (!bt_is_reg_set(bt, dreg)) return 0; bt_clear_reg(bt, dreg); @@ -7625,6 +7632,86 @@ static int check_atomic_rmw(struct bpf_verifier_env *env, int insn_idx, return 0; } +static int check_atomic_load(struct bpf_verifier_env *env, int insn_idx, + struct bpf_insn *insn) +{ + struct bpf_reg_state *regs = cur_regs(env); + int err; + + err = check_reg_arg(env, insn->src_reg, SRC_OP); + if (err) + return err; + + err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); + if (err) + return err; + + if (!atomic_ptr_type_ok(env, insn->src_reg, insn)) { + verbose(env, "BPF_ATOMIC loads from R%d %s is not allowed\n", + insn->src_reg, + reg_type_str(env, reg_state(env, insn->src_reg)->type)); + return -EACCES; + } + + if (is_arena_reg(env, insn->src_reg)) { + err = save_aux_ptr_type(env, PTR_TO_ARENA, false); + if (err) + return err; + } + + /* Check whether we can read the memory. */ + err = check_mem_access(env, insn_idx, insn->src_reg, insn->off, + BPF_SIZE(insn->code), BPF_READ, insn->dst_reg, + true, false); + if (err) + return err; + + err = reg_bounds_sanity_check(env, ®s[insn->dst_reg], "atomic_load"); + if (err) + return err; + return 0; +} + +static int check_atomic_store(struct bpf_verifier_env *env, int insn_idx, + struct bpf_insn *insn) +{ + int err; + + err = check_reg_arg(env, insn->src_reg, SRC_OP); + if (err) + return err; + + err = check_reg_arg(env, insn->dst_reg, SRC_OP); + if (err) + return err; + + if (is_pointer_value(env, insn->src_reg)) { + verbose(env, "R%d leaks addr into mem\n", insn->src_reg); + return -EACCES; + } + + if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) { + verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n", + insn->dst_reg, + reg_type_str(env, reg_state(env, insn->dst_reg)->type)); + return -EACCES; + } + + if (is_arena_reg(env, insn->dst_reg)) { + err = save_aux_ptr_type(env, PTR_TO_ARENA, false); + if (err) + return err; + } + + /* Check whether we can write into the memory. */ + err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, + BPF_SIZE(insn->code), BPF_WRITE, insn->src_reg, + true, false); + if (err) + return err; + return 0; +} + static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn) { switch (insn->imm) { @@ -7639,6 +7726,10 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i case BPF_XCHG: case BPF_CMPXCHG: return check_atomic_rmw(env, insn_idx, insn); + case BPF_LOAD_ACQ: + return check_atomic_load(env, insn_idx, insn); + case BPF_STORE_REL: + return check_atomic_store(env, insn_idx, insn); default: verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm); return -EINVAL; @@ -20420,7 +20511,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) insn->code == (BPF_ST | BPF_MEM | BPF_W) || insn->code == (BPF_ST | BPF_MEM | BPF_DW)) { type = BPF_WRITE; - } else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) || + } else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_B) || + insn->code == (BPF_STX | BPF_ATOMIC | BPF_H) || + insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) || insn->code == (BPF_STX | BPF_ATOMIC | BPF_DW)) && env->insn_aux_data[i + delta].ptr_type == PTR_TO_ARENA) { insn->code = BPF_STX | BPF_PROBE_ATOMIC | BPF_SIZE(insn->code); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 2acf9b336371..4a20a125eb46 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -51,6 +51,19 @@ #define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */ #define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */ +#define BPF_ATOMIC_LOAD 0x10 +#define BPF_ATOMIC_STORE 0x20 +#define BPF_ATOMIC_TYPE(imm) ((imm) & 0xf0) + +#define BPF_RELAXED 0x00 +#define BPF_ACQUIRE 0x01 +#define BPF_RELEASE 0x02 +#define BPF_ACQ_REL 0x03 +#define BPF_SEQ_CST 0x04 + +#define BPF_LOAD_ACQ (BPF_ATOMIC_LOAD | BPF_ACQUIRE) /* load-acquire */ +#define BPF_STORE_REL (BPF_ATOMIC_STORE | BPF_RELEASE) /* store-release */ + enum bpf_cond_pseudo_jmp { BPF_MAY_GOTO = 0, }; -- 2.48.1.262.g85cc9f2d1e-goog