Re: [PATCH bpf-next v8 8/9] bpf: Support private stack for struct_ops progs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Oct 31, 2024 at 8:10 PM Yonghong Song <yonghong.song@xxxxxxxxx> wrote:
>
> For struct_ops progs, whether a particular prog will use private stack
> or not (prog->aux->use_priv_stack) will be set before actual insn-level
> verification for that prog. One particular implementation is to
> piggyback on struct_ops->check_member(). The next patch will have an
> example for this. The struct_ops->check_member() will set
> prog->aux->use_priv_stack to be true which enables private stack
> usage with ignoring BPF_PRIV_STACK_MIN_SIZE limit.
>
> If use_priv_stack is true for a particular struct_ops prog, bpf
> trampoline will need to do recursion checks (one level at this point)
> to avoid stack overwrite. A field (recursion_skipped()) is added to
> bpf_prog_aux structure such that if bpf_prog->aux->recursion_skipped
> is set by the struct_ops subsystem, the function will be called
> to terminate the prog run, collect related info, etc.
>
> Acked-by: Tejun Heo <tj@xxxxxxxxxx>
> Signed-off-by: Yonghong Song <yonghong.song@xxxxxxxxx>
> ---
>  include/linux/bpf.h          |  1 +
>  include/linux/bpf_verifier.h |  1 +
>  kernel/bpf/trampoline.c      |  4 ++++
>  kernel/bpf/verifier.c        | 36 ++++++++++++++++++++++++++++++++----
>  4 files changed, 38 insertions(+), 4 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 8a3ea7440a4a..7a34108c6974 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1528,6 +1528,7 @@ struct bpf_prog_aux {
>         u64 prog_array_member_cnt; /* counts how many times as member of prog_array */
>         struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */
>         struct bpf_arena *arena;
> +       void (*recursion_skipped)(struct bpf_prog *prog); /* callback if recursion is skipped */

The name doesn't fit.
The recursion wasn't skipped.
It's the execution of the program that was skipped.
'recursion_detected' or 'recursion_disallowed' would be a better name.

>         /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
>         const struct btf_type *attach_func_proto;
>         /* function name for valid attach_btf_id */
> diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> index bc28ce7996ac..ff0fba935f89 100644
> --- a/include/linux/bpf_verifier.h
> +++ b/include/linux/bpf_verifier.h
> @@ -889,6 +889,7 @@ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog)
>         case BPF_PROG_TYPE_TRACING:
>                 return prog->expected_attach_type != BPF_TRACE_ITER;
>         case BPF_PROG_TYPE_STRUCT_OPS:
> +               return prog->aux->use_priv_stack;
>         case BPF_PROG_TYPE_LSM:
>                 return false;
>         default:
> diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
> index 9f36c049f4c2..a84e60efbf89 100644
> --- a/kernel/bpf/trampoline.c
> +++ b/kernel/bpf/trampoline.c
> @@ -899,6 +899,8 @@ static u64 notrace __bpf_prog_enter_recur(struct bpf_prog *prog, struct bpf_tram
>
>         if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
>                 bpf_prog_inc_misses_counter(prog);
> +               if (prog->aux->recursion_skipped)
> +                       prog->aux->recursion_skipped(prog);
>                 return 0;
>         }
>         return bpf_prog_start_time();
> @@ -975,6 +977,8 @@ u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
>
>         if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
>                 bpf_prog_inc_misses_counter(prog);
> +               if (prog->aux->recursion_skipped)
> +                       prog->aux->recursion_skipped(prog);
>                 return 0;
>         }
>         return bpf_prog_start_time();
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 30e74db6a85f..865191c5d21b 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -6023,17 +6023,31 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
>
>  static int bpf_enable_priv_stack(struct bpf_verifier_env *env)
>  {
> +       bool force_priv_stack = env->prog->aux->use_priv_stack;
>         struct bpf_subprog_info *si;
> +       int ret;
> +
> +       if (!bpf_jit_supports_private_stack()) {
> +               if (force_priv_stack) {
> +                       verbose(env, "Private stack not supported by jit\n");
> +                       return -EACCES;
> +               }

This logic would fit better in the patch 2.
Less code churn and the whole approach is easier to understand.

I don't like this inband signaling.
Now I see why you had that weird <0 check in patch 2 :(
This is ugly.
May be it should be a separate bool request_priv_stack:1
that struct_ops callback will set and it will clean up
this logic.

>
> -       if (!bpf_jit_supports_private_stack())
>                 return NO_PRIV_STACK;
> +       }
>
> +       ret = PRIV_STACK_ADAPTIVE;
>         switch (env->prog->type) {
>         case BPF_PROG_TYPE_KPROBE:
>         case BPF_PROG_TYPE_TRACEPOINT:
>         case BPF_PROG_TYPE_PERF_EVENT:
>         case BPF_PROG_TYPE_RAW_TRACEPOINT:
>                 break;
> +       case BPF_PROG_TYPE_STRUCT_OPS:
> +               if (!force_priv_stack)
> +                       return NO_PRIV_STACK;
> +               ret = PRIV_STACK_ALWAYS;
> +               break;
>         case BPF_PROG_TYPE_TRACING:
>                 if (env->prog->expected_attach_type != BPF_TRACE_ITER)
>                         break;
> @@ -6044,11 +6058,18 @@ static int bpf_enable_priv_stack(struct bpf_verifier_env *env)
>
>         si = env->subprog_info;
>         for (int i = 0; i < env->subprog_cnt; i++) {
> -               if (si[i].has_tail_call)
> +               if (si[i].has_tail_call) {
> +                       if (ret == PRIV_STACK_ALWAYS) {
> +                               verbose(env,
> +                                       "Private stack not supported due to tail call presence\n");
> +                               return -EACCES;

> +                       }
> +
>                         return NO_PRIV_STACK;
> +               }
>         }
>
> -       return PRIV_STACK_ADAPTIVE;
> +       return ret;
>  }
>
>  static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth)
> @@ -6121,7 +6142,8 @@ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx,
>                                         idx, subprog_depth);
>                                 return -EACCES;
>                         }
> -                       if (subprog_depth >= BPF_PRIV_STACK_MIN_SIZE) {
> +                       if (priv_stack_supported == PRIV_STACK_ALWAYS ||
> +                           subprog_depth >= BPF_PRIV_STACK_MIN_SIZE) {
>                                 subprog[idx].use_priv_stack = true;
>                                 subprog_visited[idx] = 1;
>                         }
> @@ -6271,6 +6293,12 @@ static int check_max_stack_depth(struct bpf_verifier_env *env)
>                                 depth_frame, subtree_depth);
>                         return -EACCES;
>                 }
> +               if (orig_priv_stack_supported == PRIV_STACK_ALWAYS) {
> +                       verbose(env,
> +                               "Private stack not supported due to possible nested subprog run\n");
> +                       ret = -EACCES;
> +                       goto out;
> +               }
>                 if (orig_priv_stack_supported == PRIV_STACK_ADAPTIVE) {
>                         for (int i = 0; i < env->subprog_cnt; i++)
>                                 si[i].use_priv_stack = false;
> --
> 2.43.5
>





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux