Re: [PATCH bpf-next 2/7] bpf: support non-r10 register spill/fill to/from stack in precision tracking

Eduard Zingerman <eddyz87@xxxxxxxxx> · Thu, 09 Nov 2023 17:20:19 +0200

On Mon, 2023-10-30 at 22:03 -0700, Andrii Nakryiko wrote:

All makes sense, a few nitpicks below.

Acked-by: Eduard Zingerman <eddyz87@xxxxxxxxx>

[...]

> +/* instruction history flags, used in bpf_insn_hist_entry.flags field */
> +enum {
> +	/* instruction references stack slot through PTR_TO_STACK register;
> +	 * we also store stack's frame number in lower 3 bits (MAX_CALL_FRAMES is 8)
> +	 * and accessed stack slot's index in next 6 bits (MAX_BPF_STACK is 512,
> +	 * 8 bytes per slot, so slot index (spi) is [0, 63])
> +	 */
> +	INSN_F_FRAMENO_MASK = 0x7, /* 3 bits */
> +
> +	INSN_F_SPI_MASK = 0x3f, /* 6 bits */
> +	INSN_F_SPI_SHIFT = 3, /* shifted 3 bits to the left */
> +
> +	INSN_F_STACK_ACCESS = BIT(9), /* we need 10 bits total */
> +};
> +
> +static_assert(INSN_F_FRAMENO_MASK + 1 >= MAX_CALL_FRAMES);
> +static_assert(INSN_F_SPI_MASK + 1 >= MAX_BPF_STACK / 8);
> +
>  struct bpf_insn_hist_entry {
> -	u32 prev_idx;
>  	u32 idx;
> +	/* insn idx can't be bigger than 1 million */
> +	u32 prev_idx : 22;
> +	/* special flags, e.g., whether insn is doing register stack spill/load */
> +	u32 flags : 10;
>  };

Nitpick: maybe use separate bit-fields for frameno and spi instead of
         flags? Or add dedicated accessor functions?

>  
> -#define MAX_CALL_FRAMES 8
>  /* Maximum number of register states that can exist at once */
>  #define BPF_ID_MAP_SIZE ((MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) * MAX_CALL_FRAMES)
>  struct bpf_verifier_state {
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 2905ce2e8b34..fbb779583d52 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -3479,14 +3479,20 @@ static bool is_jmp_point(struct bpf_verifier_env *env, int insn_idx)
>  }
>  
>  /* for any branch, call, exit record the history of jmps in the given state */
> -static int push_jmp_history(struct bpf_verifier_env *env,
> -			    struct bpf_verifier_state *cur)
> +static int push_insn_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur,
> +			     int insn_flags)
>  {
>  	struct bpf_insn_hist_entry *p;
>  	size_t alloc_size;
>  
> -	if (!is_jmp_point(env, env->insn_idx))
> +	/* combine instruction flags if we already recorded this instruction */
> +	if (cur->insn_hist_end > cur->insn_hist_start &&
> +	    (p = &env->insn_hist[cur->insn_hist_end - 1]) &&
> +	    p->idx == env->insn_idx &&
> +	    p->prev_idx == env->prev_insn_idx) {
> +		p->flags |= insn_flags;

Nitpick: maybe add an assert to check that frameno/spi are not or'ed?

[...]

> +static struct bpf_insn_hist_entry *get_hist_insn_entry(struct bpf_verifier_env *env,
> +						       u32 hist_start, u32 hist_end, int insn_idx)

Nitpick: maybe rename 'hist_insn' to 'insn_hist', i.e. 'get_insn_hist_entry'?

[...]

> @@ -4713,9 +4711,12 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
>  
>  		/* Mark slots affected by this stack write. */
>  		for (i = 0; i < size; i++)
> -			state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
> -				type;
> +			state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = type;
> +		insn_flags = 0; /* not a register spill */
>  	}
> +
> +	if (insn_flags)
> +		return push_insn_history(env, env->cur_state, insn_flags);

Maybe add a check that insn is BPF_ST or BPF_STX here?
Only these cases are supported by backtrack_insn() while
check_mem_access() is called from multiple places.

>  	return 0;
>  }
>  
> @@ -4908,6 +4909,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
>  	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
>  	struct bpf_reg_state *reg;
>  	u8 *stype, type;
> +	int insn_flags = INSN_F_STACK_ACCESS | (spi << INSN_F_SPI_SHIFT) | reg_state->frameno;
>  
>  	stype = reg_state->stack[spi].slot_type;
>  	reg = &reg_state->stack[spi].spilled_ptr;
> @@ -4953,12 +4955,10 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
>  					return -EACCES;
>  				}
>  				mark_reg_unknown(env, state->regs, dst_regno);
> +				insn_flags = 0; /* not restoring original register state */
>  			}
>  			state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
> -			return 0;
> -		}
> -
> -		if (dst_regno >= 0) {
> +		} else if (dst_regno >= 0) {
>  			/* restore register state from stack */
>  			copy_register_state(&state->regs[dst_regno], reg);
>  			/* mark reg as written since spilled pointer state likely
> @@ -4994,7 +4994,10 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
>  		mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
>  		if (dst_regno >= 0)
>  			mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
> +		insn_flags = 0; /* we are not restoring spilled register */
>  	}
> +	if (insn_flags)
> +		return push_insn_history(env, env->cur_state, insn_flags);
>  	return 0;
>  }
>  
> @@ -7125,7 +7128,6 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
>  			       BPF_SIZE(insn->code), BPF_WRITE, -1, true, false);
>  	if (err)
>  		return err;
> -
>  	return 0;
>  }
>  
> @@ -17001,7 +17003,8 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
>  			 * the precision needs to be propagated back in
>  			 * the current state.
>  			 */
> -			err = err ? : push_jmp_history(env, cur);
> +			if (is_jmp_point(env, env->insn_idx))
> +				err = err ? : push_insn_history(env, cur, 0);
>  			err = err ? : propagate_precision(env, &sl->state);
>  			if (err)
>  				return err;
> @@ -17265,7 +17268,7 @@ static int do_check(struct bpf_verifier_env *env)
>  		}
>  
>  		if (is_jmp_point(env, env->insn_idx)) {
> -			err = push_jmp_history(env, state);
> +			err = push_insn_history(env, state, 0);
>  			if (err)
>  				return err;
>  		}
> diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
> index db6b3143338b..88c4207c6b4c 100644
> --- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
> +++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
> @@ -487,7 +487,24 @@ __success __log_level(2)
>   * so we won't be able to mark stack slot fp-8 as precise, and so will
>   * fallback to forcing all as precise
>   */
> -__msg("mark_precise: frame0: falling back to forcing all scalars precise")
> +__msg("10: (0f) r1 += r7")
> +__msg("mark_precise: frame0: last_idx 10 first_idx 7 subseq_idx -1")
> +__msg("mark_precise: frame0: regs=r7 stack= before 9: (bf) r1 = r8")
> +__msg("mark_precise: frame0: regs=r7 stack= before 8: (27) r7 *= 4")
> +__msg("mark_precise: frame0: regs=r7 stack= before 7: (79) r7 = *(u64 *)(r10 -8)")
> +__msg("mark_precise: frame0: parent state regs= stack=-8:  R0_w=2 R6_w=1 R8_rw=map_value(off=0,ks=4,vs=16,imm=0) R10=fp0 fp-8_rw=P1")
> +__msg("mark_precise: frame0: last_idx 18 first_idx 0 subseq_idx 7")
> +__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit")
> +__msg("mark_precise: frame1: regs= stack= before 17: (0f) r0 += r2")
> +__msg("mark_precise: frame1: regs= stack= before 16: (79) r2 = *(u64 *)(r1 +0)")
> +__msg("mark_precise: frame1: regs= stack= before 15: (79) r0 = *(u64 *)(r10 -16)")
> +__msg("mark_precise: frame1: regs= stack= before 14: (7b) *(u64 *)(r10 -16) = r2")
> +__msg("mark_precise: frame1: regs= stack= before 13: (7b) *(u64 *)(r1 +0) = r2")
> +__msg("mark_precise: frame1: regs=r2 stack= before 6: (85) call pc+6")
> +__msg("mark_precise: frame0: regs=r2 stack= before 5: (bf) r2 = r6")
> +__msg("mark_precise: frame0: regs=r6 stack= before 4: (07) r1 += -8")
> +__msg("mark_precise: frame0: regs=r6 stack= before 3: (bf) r1 = r10")
> +__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 1")
>  __naked int subprog_spill_into_parent_stack_slot_precise(void)
>  {
>  	asm volatile (
> @@ -522,14 +539,68 @@ __naked int subprog_spill_into_parent_stack_slot_precise(void)
>  	);
>  }
>  
> -__naked __noinline __used
> -static __u64 subprog_with_checkpoint(void)
> +SEC("?raw_tp")
> +__success __log_level(2)
> +__msg("17: (0f) r1 += r0")
> +__msg("mark_precise: frame0: last_idx 17 first_idx 0 subseq_idx -1")
> +__msg("mark_precise: frame0: regs=r0 stack= before 16: (bf) r1 = r7")
> +__msg("mark_precise: frame0: regs=r0 stack= before 15: (27) r0 *= 4")
> +__msg("mark_precise: frame0: regs=r0 stack= before 14: (79) r0 = *(u64 *)(r10 -16)")
> +__msg("mark_precise: frame0: regs= stack=-16 before 13: (7b) *(u64 *)(r7 -8) = r0")
> +__msg("mark_precise: frame0: regs=r0 stack= before 12: (79) r0 = *(u64 *)(r8 +16)")
> +__msg("mark_precise: frame0: regs= stack=-16 before 11: (7b) *(u64 *)(r8 +16) = r0")
> +__msg("mark_precise: frame0: regs=r0 stack= before 10: (79) r0 = *(u64 *)(r7 -8)")
> +__msg("mark_precise: frame0: regs= stack=-16 before 9: (7b) *(u64 *)(r10 -16) = r0")
> +__msg("mark_precise: frame0: regs=r0 stack= before 8: (07) r8 += -32")
> +__msg("mark_precise: frame0: regs=r0 stack= before 7: (bf) r8 = r10")
> +__msg("mark_precise: frame0: regs=r0 stack= before 6: (07) r7 += -8")
> +__msg("mark_precise: frame0: regs=r0 stack= before 5: (bf) r7 = r10")
> +__msg("mark_precise: frame0: regs=r0 stack= before 21: (95) exit")
> +__msg("mark_precise: frame1: regs=r0 stack= before 20: (bf) r0 = r1")
> +__msg("mark_precise: frame1: regs=r1 stack= before 4: (85) call pc+15")
> +__msg("mark_precise: frame0: regs=r1 stack= before 3: (bf) r1 = r6")
> +__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 1")
> +__naked int stack_slot_aliases_precision(void)
>  {
>  	asm volatile (
> -		"r0 = 0;"
> -		/* guaranteed checkpoint if BPF_F_TEST_STATE_FREQ is used */
> -		"goto +0;"
> +		"r6 = 1;"
> +		/* pass r6 through r1 into subprog to get it back as r0;
> +		 * this whole chain will have to be marked as precise later
> +		 */
> +		"r1 = r6;"
> +		"call identity_subprog;"
> +		/* let's setup two registers that are aliased to r10 */
> +		"r7 = r10;"
> +		"r7 += -8;"			/* r7 = r10 - 8 */
> +		"r8 = r10;"
> +		"r8 += -32;"			/* r8 = r10 - 32 */
> +		/* now spill subprog's return value (a r6 -> r1 -> r0 chain)
> +		 * a few times through different stack pointer regs, making
> +		 * sure to use r10, r7, and r8 both in LDX and STX insns, and
> +		 * *importantly* also using a combination of const var_off and
> +		 * insn->off to validate that we record final stack slot
> +		 * correctly, instead of relying on just insn->off derivation,
> +		 * which is only valid for r10-based stack offset
> +		 */
> +		"*(u64 *)(r10 - 16) = r0;"
> +		"r0 = *(u64 *)(r7 - 8);"	/* r7 - 8 == r10 - 16 */
> +		"*(u64 *)(r8 + 16) = r0;"	/* r8 + 16 = r10 - 16 */
> +		"r0 = *(u64 *)(r8 + 16);"
> +		"*(u64 *)(r7 - 8) = r0;"
> +		"r0 = *(u64 *)(r10 - 16);"
> +		/* get ready to use r0 as an index into array to force precision */
> +		"r0 *= 4;"
> +		"r1 = %[vals];"
> +		/* here r0->r1->r6 chain is forced to be precise and has to be
> +		 * propagated back to the beginning, including through the
> +		 * subprog call and all the stack spills and loads
> +		 */
> +		"r1 += r0;"
> +		"r0 = *(u32 *)(r1 + 0);"
>  		"exit;"
> +		:
> +		: __imm_ptr(vals)
> +		: __clobber_common, "r6"
>  	);
>  }
>  
> diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c
> index 0d84dd1f38b6..8a2ff81d8350 100644
> --- a/tools/testing/selftests/bpf/verifier/precise.c
> +++ b/tools/testing/selftests/bpf/verifier/precise.c
> @@ -140,10 +140,11 @@
>  	.result = REJECT,
>  },
>  {
> -	"precise: ST insn causing spi > allocated_stack",
> +	"precise: ST zero to stack insn is supported",
>  	.insns = {
>  	BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
>  	BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 123, 0),
> +	/* not a register spill, so we stop precision propagation for R4 here */
>  	BPF_ST_MEM(BPF_DW, BPF_REG_3, -8, 0),
>  	BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
>  	BPF_MOV64_IMM(BPF_REG_0, -1),
> @@ -157,11 +158,11 @@
>  	mark_precise: frame0: last_idx 4 first_idx 2\
>  	mark_precise: frame0: regs=r4 stack= before 4\
>  	mark_precise: frame0: regs=r4 stack= before 3\
> -	mark_precise: frame0: regs= stack=-8 before 2\
> -	mark_precise: frame0: falling back to forcing all scalars precise\
> -	force_precise: frame0: forcing r0 to be precise\
>  	mark_precise: frame0: last_idx 5 first_idx 5\
> -	mark_precise: frame0: parent state regs= stack=:",
> +	mark_precise: frame0: parent state regs=r0 stack=:\
> +	mark_precise: frame0: last_idx 4 first_idx 2\
> +	mark_precise: frame0: regs=r0 stack= before 4\
> +	5: R0=-1 R4=0",
>  	.result = VERBOSE_ACCEPT,
>  	.retval = -1,
>  },
> @@ -169,6 +170,8 @@
>  	"precise: STX insn causing spi > allocated_stack",
>  	.insns = {
>  	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
> +	/* make later reg spill more interesting by having somewhat known scalar */
> +	BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xff),
>  	BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
>  	BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 123, 0),
>  	BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, -8),
> @@ -179,18 +182,21 @@
>  	},
>  	.prog_type = BPF_PROG_TYPE_XDP,
>  	.flags = BPF_F_TEST_STATE_FREQ,
> -	.errstr = "mark_precise: frame0: last_idx 6 first_idx 6\
> +	.errstr = "mark_precise: frame0: last_idx 7 first_idx 7\
>  	mark_precise: frame0: parent state regs=r4 stack=:\
> -	mark_precise: frame0: last_idx 5 first_idx 3\
> -	mark_precise: frame0: regs=r4 stack= before 5\
> -	mark_precise: frame0: regs=r4 stack= before 4\
> -	mark_precise: frame0: regs= stack=-8 before 3\
> -	mark_precise: frame0: falling back to forcing all scalars precise\
> -	force_precise: frame0: forcing r0 to be precise\
> -	force_precise: frame0: forcing r0 to be precise\
> -	force_precise: frame0: forcing r0 to be precise\
> -	force_precise: frame0: forcing r0 to be precise\
> -	mark_precise: frame0: last_idx 6 first_idx 6\
> +	mark_precise: frame0: last_idx 6 first_idx 4\
> +	mark_precise: frame0: regs=r4 stack= before 6: (b7) r0 = -1\
> +	mark_precise: frame0: regs=r4 stack= before 5: (79) r4 = *(u64 *)(r10 -8)\
> +	mark_precise: frame0: regs= stack=-8 before 4: (7b) *(u64 *)(r3 -8) = r0\
> +	mark_precise: frame0: parent state regs=r0 stack=:\
> +	mark_precise: frame0: last_idx 3 first_idx 3\
> +	mark_precise: frame0: regs=r0 stack= before 3: (55) if r3 != 0x7b goto pc+0\
> +	mark_precise: frame0: regs=r0 stack= before 2: (bf) r3 = r10\
> +	mark_precise: frame0: regs=r0 stack= before 1: (57) r0 &= 255\
> +	mark_precise: frame0: parent state regs=r0 stack=:\
> +	mark_precise: frame0: last_idx 0 first_idx 0\
> +	mark_precise: frame0: regs=r0 stack= before 0: (85) call bpf_get_prandom_u32#7\
> +	mark_precise: frame0: last_idx 7 first_idx 7\
>  	mark_precise: frame0: parent state regs= stack=:",
>  	.result = VERBOSE_ACCEPT,
>  	.retval = -1,