Re: [PATCH bpf-next v3 4/5] bpf, x86: Add jit support for private stack

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Sep 26, 2024 at 4:45 PM Yonghong Song <yonghong.song@xxxxxxxxx> wrote:
>
> Add jit support for private stack. For a particular subtree, e.g.,
>   subtree_root <== stack depth 120
>    subprog1    <== stack depth 80
>     subprog2   <== stack depth 40
>    subprog3    <== stack depth 160
>
> Let us say that private_stack_ptr is the memory address allocated for
> private stack. The frame pointer for each above is calculated like below:
>   subtree_root  <== subtree_root_fp = private_stack_ptr + 120
>    subprog1     <== subtree_subprog1_fp = subtree_root_fp + 80
>     subprog2    <== subtree_subprog2_fp = subtree_subprog1_fp + 40
>    subprog3     <== subtree_subprog1_fp = subtree_root_fp + 160
>
> For any function call to helper/kfunc, push/pop prog frame pointer
> is needed in order to preserve frame pointer value.
>
> To deal with exception handling, push/pop frame pointer is also used
> surrounding call to subsequent subprog. For example,
>   subtree_root
>    subprog1
>      ...
>      insn: call bpf_throw
>      ...
>
> After jit, we will have
>   subtree_root
>    insn: push r9
>    subprog1
>      ...
>      insn: push r9
>      insn: call bpf_throw
>      insn: pop r9
>      ...
>    insn: pop r9
>
>   exception_handler
>      pop r9
>      ...
> where r9 represents the fp for each subprog.

Kumar,
please review the interaction of priv_stack with exceptions.

>
> Signed-off-by: Yonghong Song <yonghong.song@xxxxxxxxx>
> ---
>  arch/x86/net/bpf_jit_comp.c | 87 ++++++++++++++++++++++++++++++++++---
>  1 file changed, 81 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> index 06b080b61aa5..c264822c926b 100644
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c
> @@ -325,6 +325,22 @@ struct jit_context {
>  /* Number of bytes that will be skipped on tailcall */
>  #define X86_TAIL_CALL_OFFSET   (12 + ENDBR_INSN_SIZE)
>
> +static void push_r9(u8 **pprog)
> +{
> +       u8 *prog = *pprog;
> +
> +       EMIT2(0x41, 0x51);   /* push r9 */
> +       *pprog = prog;
> +}
> +
> +static void pop_r9(u8 **pprog)
> +{
> +       u8 *prog = *pprog;
> +
> +       EMIT2(0x41, 0x59);   /* pop r9 */
> +       *pprog = prog;
> +}
> +
>  static void push_r12(u8 **pprog)
>  {
>         u8 *prog = *pprog;
> @@ -491,7 +507,7 @@ static void emit_prologue_tail_call(u8 **pprog, bool is_subprog)
>   */
>  static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
>                           bool tail_call_reachable, bool is_subprog,
> -                         bool is_exception_cb)
> +                         bool is_exception_cb, enum bpf_pstack_state  pstack)

enum bpf_priv_stack_mode priv_stack_mode

>  {
>         u8 *prog = *pprog;
>
> @@ -518,6 +534,8 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
>                  * first restore those callee-saved regs from stack, before
>                  * reusing the stack frame.
>                  */
> +               if (pstack)
> +                       pop_r9(&prog);

This is an unnecessary cognitive load, since readers
need to remember absolute values of enum.
Just use
if (priv_stack_mode != NO_PRIV_STACK)

>                 pop_callee_regs(&prog, all_callee_regs_used);
>                 pop_r12(&prog);
>                 /* Reset the stack frame. */
> @@ -1404,6 +1422,22 @@ static void emit_shiftx(u8 **pprog, u32 dst_reg, u8 src_reg, bool is64, u8 op)
>         *pprog = prog;
>  }
>
> +static void emit_private_frame_ptr(u8 **pprog, void *private_frame_ptr)
> +{
> +       u8 *prog = *pprog;
> +
> +       /* movabs r9, private_frame_ptr */
> +       emit_mov_imm64(&prog, X86_REG_R9, (long) private_frame_ptr >> 32,
> +                      (u32) (long) private_frame_ptr);
> +
> +       /* add <r9>, gs:[<off>] */
> +       EMIT2(0x65, 0x4c);
> +       EMIT3(0x03, 0x0c, 0x25);
> +       EMIT((u32)(unsigned long)&this_cpu_off, 4);
> +
> +       *pprog = prog;
> +}
> +
>  #define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp)))
>
>  #define __LOAD_TCC_PTR(off)                    \
> @@ -1421,20 +1455,31 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
>         int insn_cnt = bpf_prog->len;
>         bool seen_exit = false;
>         u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
> +       void __percpu *private_frame_ptr = NULL;
>         u64 arena_vm_start, user_vm_start;
> +       u32 orig_stack_depth, stack_depth;
>         int i, excnt = 0;
>         int ilen, proglen = 0;
>         u8 *prog = temp;
>         int err;
>
> +       stack_depth = bpf_prog->aux->stack_depth;
> +       orig_stack_depth = round_up(stack_depth, 8);
> +       if (bpf_prog->pstack) {
> +               stack_depth = 0;
> +               if (bpf_prog->pstack == PSTACK_TREE_ROOT)
> +                       private_frame_ptr = bpf_prog->private_stack_ptr + orig_stack_depth;
> +       }

Same issue.
switch (priv_stack_mode) {
case PRIV_STACK_MAIN_PROG:
    priv_frame_ptr = bpf_prog->priv_stack_ptr + orig_stack_depth;
    fallthrough;
case PRIV_STACK_SUB_PROG:
    stack_depth = 0;
    break;
}

would be easier to read.

> +
>         arena_vm_start = bpf_arena_get_kern_vm_start(bpf_prog->aux->arena);
>         user_vm_start = bpf_arena_get_user_vm_start(bpf_prog->aux->arena);
>
>         detect_reg_usage(insn, insn_cnt, callee_regs_used);
>
> -       emit_prologue(&prog, bpf_prog->aux->stack_depth,
> +       emit_prologue(&prog, stack_depth,
>                       bpf_prog_was_classic(bpf_prog), tail_call_reachable,
> -                     bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb);
> +                     bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb,
> +                     bpf_prog->pstack);
>         /* Exception callback will clobber callee regs for its own use, and
>          * restore the original callee regs from main prog's stack frame.
>          */
> @@ -1454,6 +1499,17 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
>                 emit_mov_imm64(&prog, X86_REG_R12,
>                                arena_vm_start >> 32, (u32) arena_vm_start);
>
> +       if (bpf_prog->pstack == PSTACK_TREE_ROOT) {
> +               emit_private_frame_ptr(&prog, private_frame_ptr);
> +       } else if (bpf_prog->pstack == PSTACK_TREE_INTERNAL  && orig_stack_depth) {
> +               /* r9 += orig_stack_depth */
> +               maybe_emit_1mod(&prog, X86_REG_R9, true);
> +               if (is_imm8(orig_stack_depth))
> +                       EMIT3(0x83, add_1reg(0xC0, X86_REG_R9), orig_stack_depth);
> +               else
> +                       EMIT2_off32(0x81, add_1reg(0xC0, X86_REG_R9), orig_stack_depth);
> +       }

We've been open coding 'add' insn like this for way too long.
Let's address this technical debt now.
Please move
                case BPF_ALU | BPF_ADD | BPF_K:
                case BPF_ALU | BPF_SUB | BPF_K:
                case BPF_ALU | BPF_AND | BPF_K:
                case BPF_ALU | BPF_OR | BPF_K:
                case BPF_ALU | BPF_XOR | BPF_K:
                case BPF_ALU64 | BPF_ADD | BPF_K:
                case BPF_ALU64 | BPF_SUB | BPF_K:
                case BPF_ALU64 | BPF_AND | BPF_K:
                case BPF_ALU64 | BPF_OR | BPF_K:
                case BPF_ALU64 | BPF_XOR | BPF_K:
into helpers and use it here.





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux