Re: [PATCH bpf-next v2 14/15] bpf: Optimize state pruning for spilled scalars

Andrii Nakryiko <andrii.nakryiko@xxxxxxxxx> · Tue, 9 Jan 2024 16:22:30 -0800

On Mon, Jan 8, 2024 at 12:53 PM Maxim Mikityanskiy <maxtram95@xxxxxxxxx> wrote:
>
> From: Eduard Zingerman <eddyz87@xxxxxxxxx>
>
> Changes for scalar ID tracking of spilled unbound scalars lead to
> certain verification performance regression. This commit mitigates the
> regression by exploiting the following properties maintained by
> check_stack_read_fixed_off():
> - a mix of STACK_MISC, STACK_ZERO and STACK_INVALID marks is read as
>   unbounded scalar register;
> - spi with all slots marked STACK_ZERO is read as scalar register with
>   value zero.
>
> This commit modifies stacksafe() to consider situations above
> equivalent.
>
> Veristat results after this patch show significant gains:
>
> $ ./veristat -e file,prog,states -f '!states_pct<10' -f '!states_b<10' -C not-opt after
> File              Program   States (A)  States (B)  States    (DIFF)
> ----------------  --------  ----------  ----------  ----------------
> pyperf180.bpf.o   on_event       10456        8422   -2034 (-19.45%)
> pyperf600.bpf.o   on_event       37319       22519  -14800 (-39.66%)
> strobemeta.bpf.o  on_event       13435        4703   -8732 (-64.99%)
>
> Signed-off-by: Eduard Zingerman <eddyz87@xxxxxxxxx>
> ---
>  kernel/bpf/verifier.c | 83 +++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 83 insertions(+)
>
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index aeb3e198a5ea..cb82f8d4226f 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -1170,6 +1170,12 @@ static void mark_stack_slot_misc(struct bpf_verifier_env *env, u8 *stype)
>         *stype = STACK_MISC;
>  }
>
> +static bool is_spilled_scalar_reg64(const struct bpf_stack_state *stack)
> +{
> +       return stack->slot_type[0] == STACK_SPILL &&
> +              stack->spilled_ptr.type == SCALAR_VALUE;
> +}
> +
>  static void scrub_spilled_slot(u8 *stype)
>  {
>         if (*stype != STACK_INVALID)
> @@ -16459,11 +16465,45 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
>         }
>  }
>
> +static bool is_stack_zero64(struct bpf_stack_state *stack)
> +{
> +       u32 i;
> +
> +       for (i = 0; i < ARRAY_SIZE(stack->slot_type); ++i)
> +               if (stack->slot_type[i] != STACK_ZERO)
> +                       return false;
> +       return true;
> +}
> +
> +static bool is_stack_unbound_slot64(struct bpf_verifier_env *env,
> +                                   struct bpf_stack_state *stack)
> +{
> +       u32 i;
> +
> +       for (i = 0; i < ARRAY_SIZE(stack->slot_type); ++i)
> +               if (stack->slot_type[i] != STACK_ZERO &&
> +                   stack->slot_type[i] != STACK_MISC &&
> +                   (!env->allow_uninit_stack || stack->slot_type[i] != STACK_INVALID))
> +                       return false;
> +       return true;
> +}
> +
> +static bool is_spilled_unbound_scalar_reg64(struct bpf_stack_state *stack)
> +{
> +       return is_spilled_scalar_reg64(stack) && __is_scalar_unbounded(&stack->spilled_ptr);
> +}
> +
>  static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
>                       struct bpf_func_state *cur, struct bpf_idmap *idmap, bool exact)
>  {
> +       struct bpf_reg_state unbound_reg = {};
> +       struct bpf_reg_state zero_reg = {};
>         int i, spi;
>
> +       __mark_reg_unknown(env, &unbound_reg);
> +       __mark_reg_const_zero(env, &zero_reg);
> +       zero_reg.precise = true;

these are immutable, right? Would it make sense to set them up just
once as static variables instead of initializing on each check?

> +
>         /* walk slots of the explored stack and ignore any additional
>          * slots in the current stack, since explored(safe) state
>          * didn't use them
> @@ -16484,6 +16524,49 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
>                         continue;
>                 }
>

we didn't check that cur->stack[spi] is ok to access yet, it's done a
bit later with `if (i >= cur->allocated_stack)`, if I'm not mistaken.
So these checks would need to be moved a bit lower, probably.

> +               /* load of stack value with all MISC and ZERO slots produces unbounded
> +                * scalar value, call regsafe to ensure scalar ids are compared.
> +                */
> +               if (is_spilled_unbound_scalar_reg64(&old->stack[spi]) &&
> +                   is_stack_unbound_slot64(env, &cur->stack[spi])) {
> +                       i += BPF_REG_SIZE - 1;
> +                       if (!regsafe(env, &old->stack[spi].spilled_ptr, &unbound_reg,
> +                                    idmap, exact))
> +                               return false;
> +                       continue;
> +               }
> +
> +               if (is_stack_unbound_slot64(env, &old->stack[spi]) &&
> +                   is_spilled_unbound_scalar_reg64(&cur->stack[spi])) {
> +                       i += BPF_REG_SIZE - 1;
> +                       if (!regsafe(env,  &unbound_reg, &cur->stack[spi].spilled_ptr,
> +                                    idmap, exact))
> +                               return false;
> +                       continue;
> +               }

scalar_old = scalar_cur = NULL;
if (is_spilled_unbound64(&old->..))
    scalar_old = old->stack[spi].slot_type[0] == STACK_SPILL ?
&old->stack[spi].spilled_ptr : &unbound_reg;
if (is_spilled_unbound64(&cur->..))
    scalar_cur = cur->stack[spi].slot_type[0] == STACK_SPILL ?
&cur->stack[spi].spilled_ptr : &unbound_reg;
if (scalar_old && scalar_cur) {
    if (!regsafe(env, scalar_old, scalar_new, idmap, exact)
        return false;
    i += BPF_REG_SIZE - 1;
    continue;
}

where is_spilled_unbound64() would be basically `return
is_spilled_unbound_scalar_reg64(&old->..) ||
is_stack_unbound_slot64(&old->...)`;

Similarly for zero case? Though I'm wondering if zero case should be
checked first, as it's actually a subset of is_spilled_unbound64 when
it comes to STACK_ZERO/STACK_MISC mixes, no?

> +
> +               /* load of stack value with all ZERO slots produces scalar value 0,
> +                * call regsafe to ensure scalar ids are compared and precision
> +                * flags are taken into account.
> +                */
> +               if (is_spilled_scalar_reg64(&old->stack[spi]) &&
> +                   is_stack_zero64(&cur->stack[spi])) {
> +                       if (!regsafe(env, &old->stack[spi].spilled_ptr, &zero_reg,
> +                                    idmap, exact))
> +                               return false;
> +                       i += BPF_REG_SIZE - 1;
> +                       continue;
> +               }
> +
> +               if (is_stack_zero64(&old->stack[spi]) &&
> +                   is_spilled_scalar_reg64(&cur->stack[spi])) {
> +                       if (!regsafe(env, &zero_reg, &cur->stack[spi].spilled_ptr,
> +                                    idmap, exact))
> +                               return false;
> +                       i += BPF_REG_SIZE - 1;
> +                       continue;
> +               }
> +
>                 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
>                         continue;
>
> --
> 2.43.0
>