On Mon, Jan 8, 2024 at 12:53 PM Maxim Mikityanskiy <maxtram95@xxxxxxxxx> wrote: > > From: Eduard Zingerman <eddyz87@xxxxxxxxx> > > Changes for scalar ID tracking of spilled unbound scalars lead to > certain verification performance regression. This commit mitigates the > regression by exploiting the following properties maintained by > check_stack_read_fixed_off(): > - a mix of STACK_MISC, STACK_ZERO and STACK_INVALID marks is read as > unbounded scalar register; > - spi with all slots marked STACK_ZERO is read as scalar register with > value zero. > > This commit modifies stacksafe() to consider situations above > equivalent. > > Veristat results after this patch show significant gains: > > $ ./veristat -e file,prog,states -f '!states_pct<10' -f '!states_b<10' -C not-opt after > File Program States (A) States (B) States (DIFF) > ---------------- -------- ---------- ---------- ---------------- > pyperf180.bpf.o on_event 10456 8422 -2034 (-19.45%) > pyperf600.bpf.o on_event 37319 22519 -14800 (-39.66%) > strobemeta.bpf.o on_event 13435 4703 -8732 (-64.99%) > > Signed-off-by: Eduard Zingerman <eddyz87@xxxxxxxxx> > --- > kernel/bpf/verifier.c | 83 +++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 83 insertions(+) > > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index aeb3e198a5ea..cb82f8d4226f 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c > @@ -1170,6 +1170,12 @@ static void mark_stack_slot_misc(struct bpf_verifier_env *env, u8 *stype) > *stype = STACK_MISC; > } > > +static bool is_spilled_scalar_reg64(const struct bpf_stack_state *stack) > +{ > + return stack->slot_type[0] == STACK_SPILL && > + stack->spilled_ptr.type == SCALAR_VALUE; > +} > + > static void scrub_spilled_slot(u8 *stype) > { > if (*stype != STACK_INVALID) > @@ -16459,11 +16465,45 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, > } > } > > +static bool is_stack_zero64(struct bpf_stack_state *stack) > +{ > + u32 i; > + > + for (i = 0; i < ARRAY_SIZE(stack->slot_type); ++i) > + if (stack->slot_type[i] != STACK_ZERO) > + return false; > + return true; > +} > + > +static bool is_stack_unbound_slot64(struct bpf_verifier_env *env, > + struct bpf_stack_state *stack) > +{ > + u32 i; > + > + for (i = 0; i < ARRAY_SIZE(stack->slot_type); ++i) > + if (stack->slot_type[i] != STACK_ZERO && > + stack->slot_type[i] != STACK_MISC && > + (!env->allow_uninit_stack || stack->slot_type[i] != STACK_INVALID)) > + return false; > + return true; > +} > + > +static bool is_spilled_unbound_scalar_reg64(struct bpf_stack_state *stack) > +{ > + return is_spilled_scalar_reg64(stack) && __is_scalar_unbounded(&stack->spilled_ptr); > +} > + > static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, > struct bpf_func_state *cur, struct bpf_idmap *idmap, bool exact) > { > + struct bpf_reg_state unbound_reg = {}; > + struct bpf_reg_state zero_reg = {}; > int i, spi; > > + __mark_reg_unknown(env, &unbound_reg); > + __mark_reg_const_zero(env, &zero_reg); > + zero_reg.precise = true; these are immutable, right? Would it make sense to set them up just once as static variables instead of initializing on each check? > + > /* walk slots of the explored stack and ignore any additional > * slots in the current stack, since explored(safe) state > * didn't use them > @@ -16484,6 +16524,49 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, > continue; > } > we didn't check that cur->stack[spi] is ok to access yet, it's done a bit later with `if (i >= cur->allocated_stack)`, if I'm not mistaken. So these checks would need to be moved a bit lower, probably. > + /* load of stack value with all MISC and ZERO slots produces unbounded > + * scalar value, call regsafe to ensure scalar ids are compared. > + */ > + if (is_spilled_unbound_scalar_reg64(&old->stack[spi]) && > + is_stack_unbound_slot64(env, &cur->stack[spi])) { > + i += BPF_REG_SIZE - 1; > + if (!regsafe(env, &old->stack[spi].spilled_ptr, &unbound_reg, > + idmap, exact)) > + return false; > + continue; > + } > + > + if (is_stack_unbound_slot64(env, &old->stack[spi]) && > + is_spilled_unbound_scalar_reg64(&cur->stack[spi])) { > + i += BPF_REG_SIZE - 1; > + if (!regsafe(env, &unbound_reg, &cur->stack[spi].spilled_ptr, > + idmap, exact)) > + return false; > + continue; > + } scalar_old = scalar_cur = NULL; if (is_spilled_unbound64(&old->..)) scalar_old = old->stack[spi].slot_type[0] == STACK_SPILL ? &old->stack[spi].spilled_ptr : &unbound_reg; if (is_spilled_unbound64(&cur->..)) scalar_cur = cur->stack[spi].slot_type[0] == STACK_SPILL ? &cur->stack[spi].spilled_ptr : &unbound_reg; if (scalar_old && scalar_cur) { if (!regsafe(env, scalar_old, scalar_new, idmap, exact) return false; i += BPF_REG_SIZE - 1; continue; } where is_spilled_unbound64() would be basically `return is_spilled_unbound_scalar_reg64(&old->..) || is_stack_unbound_slot64(&old->...)`; Similarly for zero case? Though I'm wondering if zero case should be checked first, as it's actually a subset of is_spilled_unbound64 when it comes to STACK_ZERO/STACK_MISC mixes, no? > + > + /* load of stack value with all ZERO slots produces scalar value 0, > + * call regsafe to ensure scalar ids are compared and precision > + * flags are taken into account. > + */ > + if (is_spilled_scalar_reg64(&old->stack[spi]) && > + is_stack_zero64(&cur->stack[spi])) { > + if (!regsafe(env, &old->stack[spi].spilled_ptr, &zero_reg, > + idmap, exact)) > + return false; > + i += BPF_REG_SIZE - 1; > + continue; > + } > + > + if (is_stack_zero64(&old->stack[spi]) && > + is_spilled_scalar_reg64(&cur->stack[spi])) { > + if (!regsafe(env, &zero_reg, &cur->stack[spi].spilled_ptr, > + idmap, exact)) > + return false; > + i += BPF_REG_SIZE - 1; > + continue; > + } > + > if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID) > continue; > > -- > 2.43.0 >