On Mon, 2023-10-30 at 22:03 -0700, Andrii Nakryiko wrote: All makes sense, a few nitpicks below. Acked-by: Eduard Zingerman <eddyz87@xxxxxxxxx> [...] > +/* instruction history flags, used in bpf_insn_hist_entry.flags field */ > +enum { > + /* instruction references stack slot through PTR_TO_STACK register; > + * we also store stack's frame number in lower 3 bits (MAX_CALL_FRAMES is 8) > + * and accessed stack slot's index in next 6 bits (MAX_BPF_STACK is 512, > + * 8 bytes per slot, so slot index (spi) is [0, 63]) > + */ > + INSN_F_FRAMENO_MASK = 0x7, /* 3 bits */ > + > + INSN_F_SPI_MASK = 0x3f, /* 6 bits */ > + INSN_F_SPI_SHIFT = 3, /* shifted 3 bits to the left */ > + > + INSN_F_STACK_ACCESS = BIT(9), /* we need 10 bits total */ > +}; > + > +static_assert(INSN_F_FRAMENO_MASK + 1 >= MAX_CALL_FRAMES); > +static_assert(INSN_F_SPI_MASK + 1 >= MAX_BPF_STACK / 8); > + > struct bpf_insn_hist_entry { > - u32 prev_idx; > u32 idx; > + /* insn idx can't be bigger than 1 million */ > + u32 prev_idx : 22; > + /* special flags, e.g., whether insn is doing register stack spill/load */ > + u32 flags : 10; > }; Nitpick: maybe use separate bit-fields for frameno and spi instead of flags? Or add dedicated accessor functions? > > -#define MAX_CALL_FRAMES 8 > /* Maximum number of register states that can exist at once */ > #define BPF_ID_MAP_SIZE ((MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) * MAX_CALL_FRAMES) > struct bpf_verifier_state { > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index 2905ce2e8b34..fbb779583d52 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c > @@ -3479,14 +3479,20 @@ static bool is_jmp_point(struct bpf_verifier_env *env, int insn_idx) > } > > /* for any branch, call, exit record the history of jmps in the given state */ > -static int push_jmp_history(struct bpf_verifier_env *env, > - struct bpf_verifier_state *cur) > +static int push_insn_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur, > + int insn_flags) > { > struct bpf_insn_hist_entry *p; > size_t alloc_size; > > - if (!is_jmp_point(env, env->insn_idx)) > + /* combine instruction flags if we already recorded this instruction */ > + if (cur->insn_hist_end > cur->insn_hist_start && > + (p = &env->insn_hist[cur->insn_hist_end - 1]) && > + p->idx == env->insn_idx && > + p->prev_idx == env->prev_insn_idx) { > + p->flags |= insn_flags; Nitpick: maybe add an assert to check that frameno/spi are not or'ed? [...] > +static struct bpf_insn_hist_entry *get_hist_insn_entry(struct bpf_verifier_env *env, > + u32 hist_start, u32 hist_end, int insn_idx) Nitpick: maybe rename 'hist_insn' to 'insn_hist', i.e. 'get_insn_hist_entry'? [...] > @@ -4713,9 +4711,12 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, > > /* Mark slots affected by this stack write. */ > for (i = 0; i < size; i++) > - state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = > - type; > + state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = type; > + insn_flags = 0; /* not a register spill */ > } > + > + if (insn_flags) > + return push_insn_history(env, env->cur_state, insn_flags); Maybe add a check that insn is BPF_ST or BPF_STX here? Only these cases are supported by backtrack_insn() while check_mem_access() is called from multiple places. > return 0; > } > > @@ -4908,6 +4909,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, > int i, slot = -off - 1, spi = slot / BPF_REG_SIZE; > struct bpf_reg_state *reg; > u8 *stype, type; > + int insn_flags = INSN_F_STACK_ACCESS | (spi << INSN_F_SPI_SHIFT) | reg_state->frameno; > > stype = reg_state->stack[spi].slot_type; > reg = ®_state->stack[spi].spilled_ptr; > @@ -4953,12 +4955,10 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, > return -EACCES; > } > mark_reg_unknown(env, state->regs, dst_regno); > + insn_flags = 0; /* not restoring original register state */ > } > state->regs[dst_regno].live |= REG_LIVE_WRITTEN; > - return 0; > - } > - > - if (dst_regno >= 0) { > + } else if (dst_regno >= 0) { > /* restore register state from stack */ > copy_register_state(&state->regs[dst_regno], reg); > /* mark reg as written since spilled pointer state likely > @@ -4994,7 +4994,10 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, > mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); > if (dst_regno >= 0) > mark_reg_stack_read(env, reg_state, off, off + size, dst_regno); > + insn_flags = 0; /* we are not restoring spilled register */ > } > + if (insn_flags) > + return push_insn_history(env, env->cur_state, insn_flags); > return 0; > } > > @@ -7125,7 +7128,6 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i > BPF_SIZE(insn->code), BPF_WRITE, -1, true, false); > if (err) > return err; > - > return 0; > } > > @@ -17001,7 +17003,8 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) > * the precision needs to be propagated back in > * the current state. > */ > - err = err ? : push_jmp_history(env, cur); > + if (is_jmp_point(env, env->insn_idx)) > + err = err ? : push_insn_history(env, cur, 0); > err = err ? : propagate_precision(env, &sl->state); > if (err) > return err; > @@ -17265,7 +17268,7 @@ static int do_check(struct bpf_verifier_env *env) > } > > if (is_jmp_point(env, env->insn_idx)) { > - err = push_jmp_history(env, state); > + err = push_insn_history(env, state, 0); > if (err) > return err; > } > diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c > index db6b3143338b..88c4207c6b4c 100644 > --- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c > +++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c > @@ -487,7 +487,24 @@ __success __log_level(2) > * so we won't be able to mark stack slot fp-8 as precise, and so will > * fallback to forcing all as precise > */ > -__msg("mark_precise: frame0: falling back to forcing all scalars precise") > +__msg("10: (0f) r1 += r7") > +__msg("mark_precise: frame0: last_idx 10 first_idx 7 subseq_idx -1") > +__msg("mark_precise: frame0: regs=r7 stack= before 9: (bf) r1 = r8") > +__msg("mark_precise: frame0: regs=r7 stack= before 8: (27) r7 *= 4") > +__msg("mark_precise: frame0: regs=r7 stack= before 7: (79) r7 = *(u64 *)(r10 -8)") > +__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=2 R6_w=1 R8_rw=map_value(off=0,ks=4,vs=16,imm=0) R10=fp0 fp-8_rw=P1") > +__msg("mark_precise: frame0: last_idx 18 first_idx 0 subseq_idx 7") > +__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit") > +__msg("mark_precise: frame1: regs= stack= before 17: (0f) r0 += r2") > +__msg("mark_precise: frame1: regs= stack= before 16: (79) r2 = *(u64 *)(r1 +0)") > +__msg("mark_precise: frame1: regs= stack= before 15: (79) r0 = *(u64 *)(r10 -16)") > +__msg("mark_precise: frame1: regs= stack= before 14: (7b) *(u64 *)(r10 -16) = r2") > +__msg("mark_precise: frame1: regs= stack= before 13: (7b) *(u64 *)(r1 +0) = r2") > +__msg("mark_precise: frame1: regs=r2 stack= before 6: (85) call pc+6") > +__msg("mark_precise: frame0: regs=r2 stack= before 5: (bf) r2 = r6") > +__msg("mark_precise: frame0: regs=r6 stack= before 4: (07) r1 += -8") > +__msg("mark_precise: frame0: regs=r6 stack= before 3: (bf) r1 = r10") > +__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 1") > __naked int subprog_spill_into_parent_stack_slot_precise(void) > { > asm volatile ( > @@ -522,14 +539,68 @@ __naked int subprog_spill_into_parent_stack_slot_precise(void) > ); > } > > -__naked __noinline __used > -static __u64 subprog_with_checkpoint(void) > +SEC("?raw_tp") > +__success __log_level(2) > +__msg("17: (0f) r1 += r0") > +__msg("mark_precise: frame0: last_idx 17 first_idx 0 subseq_idx -1") > +__msg("mark_precise: frame0: regs=r0 stack= before 16: (bf) r1 = r7") > +__msg("mark_precise: frame0: regs=r0 stack= before 15: (27) r0 *= 4") > +__msg("mark_precise: frame0: regs=r0 stack= before 14: (79) r0 = *(u64 *)(r10 -16)") > +__msg("mark_precise: frame0: regs= stack=-16 before 13: (7b) *(u64 *)(r7 -8) = r0") > +__msg("mark_precise: frame0: regs=r0 stack= before 12: (79) r0 = *(u64 *)(r8 +16)") > +__msg("mark_precise: frame0: regs= stack=-16 before 11: (7b) *(u64 *)(r8 +16) = r0") > +__msg("mark_precise: frame0: regs=r0 stack= before 10: (79) r0 = *(u64 *)(r7 -8)") > +__msg("mark_precise: frame0: regs= stack=-16 before 9: (7b) *(u64 *)(r10 -16) = r0") > +__msg("mark_precise: frame0: regs=r0 stack= before 8: (07) r8 += -32") > +__msg("mark_precise: frame0: regs=r0 stack= before 7: (bf) r8 = r10") > +__msg("mark_precise: frame0: regs=r0 stack= before 6: (07) r7 += -8") > +__msg("mark_precise: frame0: regs=r0 stack= before 5: (bf) r7 = r10") > +__msg("mark_precise: frame0: regs=r0 stack= before 21: (95) exit") > +__msg("mark_precise: frame1: regs=r0 stack= before 20: (bf) r0 = r1") > +__msg("mark_precise: frame1: regs=r1 stack= before 4: (85) call pc+15") > +__msg("mark_precise: frame0: regs=r1 stack= before 3: (bf) r1 = r6") > +__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 1") > +__naked int stack_slot_aliases_precision(void) > { > asm volatile ( > - "r0 = 0;" > - /* guaranteed checkpoint if BPF_F_TEST_STATE_FREQ is used */ > - "goto +0;" > + "r6 = 1;" > + /* pass r6 through r1 into subprog to get it back as r0; > + * this whole chain will have to be marked as precise later > + */ > + "r1 = r6;" > + "call identity_subprog;" > + /* let's setup two registers that are aliased to r10 */ > + "r7 = r10;" > + "r7 += -8;" /* r7 = r10 - 8 */ > + "r8 = r10;" > + "r8 += -32;" /* r8 = r10 - 32 */ > + /* now spill subprog's return value (a r6 -> r1 -> r0 chain) > + * a few times through different stack pointer regs, making > + * sure to use r10, r7, and r8 both in LDX and STX insns, and > + * *importantly* also using a combination of const var_off and > + * insn->off to validate that we record final stack slot > + * correctly, instead of relying on just insn->off derivation, > + * which is only valid for r10-based stack offset > + */ > + "*(u64 *)(r10 - 16) = r0;" > + "r0 = *(u64 *)(r7 - 8);" /* r7 - 8 == r10 - 16 */ > + "*(u64 *)(r8 + 16) = r0;" /* r8 + 16 = r10 - 16 */ > + "r0 = *(u64 *)(r8 + 16);" > + "*(u64 *)(r7 - 8) = r0;" > + "r0 = *(u64 *)(r10 - 16);" > + /* get ready to use r0 as an index into array to force precision */ > + "r0 *= 4;" > + "r1 = %[vals];" > + /* here r0->r1->r6 chain is forced to be precise and has to be > + * propagated back to the beginning, including through the > + * subprog call and all the stack spills and loads > + */ > + "r1 += r0;" > + "r0 = *(u32 *)(r1 + 0);" > "exit;" > + : > + : __imm_ptr(vals) > + : __clobber_common, "r6" > ); > } > > diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c > index 0d84dd1f38b6..8a2ff81d8350 100644 > --- a/tools/testing/selftests/bpf/verifier/precise.c > +++ b/tools/testing/selftests/bpf/verifier/precise.c > @@ -140,10 +140,11 @@ > .result = REJECT, > }, > { > - "precise: ST insn causing spi > allocated_stack", > + "precise: ST zero to stack insn is supported", > .insns = { > BPF_MOV64_REG(BPF_REG_3, BPF_REG_10), > BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 123, 0), > + /* not a register spill, so we stop precision propagation for R4 here */ > BPF_ST_MEM(BPF_DW, BPF_REG_3, -8, 0), > BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), > BPF_MOV64_IMM(BPF_REG_0, -1), > @@ -157,11 +158,11 @@ > mark_precise: frame0: last_idx 4 first_idx 2\ > mark_precise: frame0: regs=r4 stack= before 4\ > mark_precise: frame0: regs=r4 stack= before 3\ > - mark_precise: frame0: regs= stack=-8 before 2\ > - mark_precise: frame0: falling back to forcing all scalars precise\ > - force_precise: frame0: forcing r0 to be precise\ > mark_precise: frame0: last_idx 5 first_idx 5\ > - mark_precise: frame0: parent state regs= stack=:", > + mark_precise: frame0: parent state regs=r0 stack=:\ > + mark_precise: frame0: last_idx 4 first_idx 2\ > + mark_precise: frame0: regs=r0 stack= before 4\ > + 5: R0=-1 R4=0", > .result = VERBOSE_ACCEPT, > .retval = -1, > }, > @@ -169,6 +170,8 @@ > "precise: STX insn causing spi > allocated_stack", > .insns = { > BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), > + /* make later reg spill more interesting by having somewhat known scalar */ > + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xff), > BPF_MOV64_REG(BPF_REG_3, BPF_REG_10), > BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 123, 0), > BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, -8), > @@ -179,18 +182,21 @@ > }, > .prog_type = BPF_PROG_TYPE_XDP, > .flags = BPF_F_TEST_STATE_FREQ, > - .errstr = "mark_precise: frame0: last_idx 6 first_idx 6\ > + .errstr = "mark_precise: frame0: last_idx 7 first_idx 7\ > mark_precise: frame0: parent state regs=r4 stack=:\ > - mark_precise: frame0: last_idx 5 first_idx 3\ > - mark_precise: frame0: regs=r4 stack= before 5\ > - mark_precise: frame0: regs=r4 stack= before 4\ > - mark_precise: frame0: regs= stack=-8 before 3\ > - mark_precise: frame0: falling back to forcing all scalars precise\ > - force_precise: frame0: forcing r0 to be precise\ > - force_precise: frame0: forcing r0 to be precise\ > - force_precise: frame0: forcing r0 to be precise\ > - force_precise: frame0: forcing r0 to be precise\ > - mark_precise: frame0: last_idx 6 first_idx 6\ > + mark_precise: frame0: last_idx 6 first_idx 4\ > + mark_precise: frame0: regs=r4 stack= before 6: (b7) r0 = -1\ > + mark_precise: frame0: regs=r4 stack= before 5: (79) r4 = *(u64 *)(r10 -8)\ > + mark_precise: frame0: regs= stack=-8 before 4: (7b) *(u64 *)(r3 -8) = r0\ > + mark_precise: frame0: parent state regs=r0 stack=:\ > + mark_precise: frame0: last_idx 3 first_idx 3\ > + mark_precise: frame0: regs=r0 stack= before 3: (55) if r3 != 0x7b goto pc+0\ > + mark_precise: frame0: regs=r0 stack= before 2: (bf) r3 = r10\ > + mark_precise: frame0: regs=r0 stack= before 1: (57) r0 &= 255\ > + mark_precise: frame0: parent state regs=r0 stack=:\ > + mark_precise: frame0: last_idx 0 first_idx 0\ > + mark_precise: frame0: regs=r0 stack= before 0: (85) call bpf_get_prandom_u32#7\ > + mark_precise: frame0: last_idx 7 first_idx 7\ > mark_precise: frame0: parent state regs= stack=:", > .result = VERBOSE_ACCEPT, > .retval = -1,