On Tue, Oct 29, 2024 at 12:39 PM Eduard Zingerman <eddyz87@xxxxxxxxx> wrote: > > Hou Tao reported an issue with bpf_fastcall patterns allowing extra > stack space above MAX_BPF_STACK limit. This extra stack allowance is > not integrated properly with the following verifier parts: > - backtracking logic still assumes that stack can't exceed > MAX_BPF_STACK; > - bpf_verifier_env->scratched_stack_slots assumes only 64 slots are > available. > > Here is an example of an issue with precision tracking > (note stack slot -8 tracked as precise instead of -520): > > 0: (b7) r1 = 42 ; R1_w=42 > 1: (b7) r2 = 42 ; R2_w=42 > 2: (7b) *(u64 *)(r10 -512) = r1 ; R1_w=42 R10=fp0 fp-512_w=42 > 3: (7b) *(u64 *)(r10 -520) = r2 ; R2_w=42 R10=fp0 fp-520_w=42 > 4: (85) call bpf_get_smp_processor_id#8 ; R0_w=scalar(...) > 5: (79) r2 = *(u64 *)(r10 -520) ; R2_w=42 R10=fp0 fp-520_w=42 > 6: (79) r1 = *(u64 *)(r10 -512) ; R1_w=42 R10=fp0 fp-512_w=42 > 7: (bf) r3 = r10 ; R3_w=fp0 R10=fp0 > 8: (0f) r3 += r2 > mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx -1 > mark_precise: frame0: regs=r2 stack= before 7: (bf) r3 = r10 > mark_precise: frame0: regs=r2 stack= before 6: (79) r1 = *(u64 *)(r10 -512) > mark_precise: frame0: regs=r2 stack= before 5: (79) r2 = *(u64 *)(r10 -520) > mark_precise: frame0: regs= stack=-8 before 4: (85) call bpf_get_smp_processor_id#8 > mark_precise: frame0: regs= stack=-8 before 3: (7b) *(u64 *)(r10 -520) = r2 > mark_precise: frame0: regs=r2 stack= before 2: (7b) *(u64 *)(r10 -512) = r1 > mark_precise: frame0: regs=r2 stack= before 1: (b7) r2 = 42 > 9: R2_w=42 R3_w=fp42 > 9: (95) exit > > This patch disables the additional allowance for the moment. > Also, two test cases are removed: > - bpf_fastcall_max_stack_ok: > it fails w/o additional stack allowance; > - bpf_fastcall_max_stack_fail: > this test is no longer necessary, stack size follows > regular rules, pattern invalidation is checked by other > test cases. > > Reported-by: Hou Tao <houtao@xxxxxxxxxxxxxxx> > Closes: https://lore.kernel.org/bpf/20241023022752.172005-1-houtao@xxxxxxxxxxxxxxx/ > Fixes: 5b5f51bff1b6 ("bpf: no_caller_saved_registers attribute for helper calls") > Signed-off-by: Eduard Zingerman <eddyz87@xxxxxxxxx> > --- > kernel/bpf/verifier.c | 14 +---- > .../bpf/progs/verifier_bpf_fastcall.c | 55 ------------------- > 2 files changed, 2 insertions(+), 67 deletions(-) > LGTM Acked-by: Andrii Nakryiko <andrii@xxxxxxxxxx> > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index 587a6c76e564..a494396bef2a 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c > @@ -6804,20 +6804,10 @@ static int check_stack_slot_within_bounds(struct bpf_verifier_env *env, > struct bpf_func_state *state, > enum bpf_access_type t) > { > - struct bpf_insn_aux_data *aux = &env->insn_aux_data[env->insn_idx]; > - int min_valid_off, max_bpf_stack; > - > - /* If accessing instruction is a spill/fill from bpf_fastcall pattern, > - * add room for all caller saved registers below MAX_BPF_STACK. > - * In case if bpf_fastcall rewrite won't happen maximal stack depth > - * would be checked by check_max_stack_depth_subprog(). > - */ > - max_bpf_stack = MAX_BPF_STACK; > - if (aux->fastcall_pattern) > - max_bpf_stack += CALLER_SAVED_REGS * BPF_REG_SIZE; > + int min_valid_off; > > if (t == BPF_WRITE || env->allow_uninit_stack) > - min_valid_off = -max_bpf_stack; > + min_valid_off = -MAX_BPF_STACK; > else > min_valid_off = -state->allocated_stack; > > diff --git a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c > index 9da97d2efcd9..5094c288cfd7 100644 > --- a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c > +++ b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c > @@ -790,61 +790,6 @@ __naked static void cumulative_stack_depth_subprog(void) > :: __imm(bpf_get_smp_processor_id) : __clobber_all); > } > > -SEC("raw_tp") > -__arch_x86_64 > -__log_level(4) > -__msg("stack depth 512") > -__xlated("0: r1 = 42") > -__xlated("1: *(u64 *)(r10 -512) = r1") > -__xlated("2: w0 = ") > -__xlated("3: r0 = &(void __percpu *)(r0)") > -__xlated("4: r0 = *(u32 *)(r0 +0)") > -__xlated("5: exit") > -__success > -__naked int bpf_fastcall_max_stack_ok(void) > -{ > - asm volatile( > - "r1 = 42;" > - "*(u64 *)(r10 - %[max_bpf_stack]) = r1;" > - "*(u64 *)(r10 - %[max_bpf_stack_8]) = r1;" > - "call %[bpf_get_smp_processor_id];" > - "r1 = *(u64 *)(r10 - %[max_bpf_stack_8]);" > - "exit;" > - : > - : __imm_const(max_bpf_stack, MAX_BPF_STACK), > - __imm_const(max_bpf_stack_8, MAX_BPF_STACK + 8), > - __imm(bpf_get_smp_processor_id) > - : __clobber_all > - ); > -} > - > -SEC("raw_tp") > -__arch_x86_64 > -__log_level(4) > -__msg("stack depth 520") > -__failure > -__naked int bpf_fastcall_max_stack_fail(void) > -{ > - asm volatile( > - "r1 = 42;" > - "*(u64 *)(r10 - %[max_bpf_stack]) = r1;" > - "*(u64 *)(r10 - %[max_bpf_stack_8]) = r1;" > - "call %[bpf_get_smp_processor_id];" > - "r1 = *(u64 *)(r10 - %[max_bpf_stack_8]);" > - /* call to prandom blocks bpf_fastcall rewrite */ > - "*(u64 *)(r10 - %[max_bpf_stack_8]) = r1;" > - "call %[bpf_get_prandom_u32];" > - "r1 = *(u64 *)(r10 - %[max_bpf_stack_8]);" > - "exit;" > - : > - : __imm_const(max_bpf_stack, MAX_BPF_STACK), > - __imm_const(max_bpf_stack_8, MAX_BPF_STACK + 8), > - __imm(bpf_get_smp_processor_id), > - __imm(bpf_get_prandom_u32) > - : __clobber_all > - ); > -} > - > SEC("cgroup/getsockname_unix") > __xlated("0: r2 = 1") > /* bpf_cast_to_kern_ctx is replaced by a single assignment */ > -- > 2.47.0 >