Re: [PATCH bpf-next v2 2/4] bpf, x86: Create bpf_trace_run_ctx on the caller thread's stack

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, 2022-03-18 at 12:09 -0700, Alexei Starovoitov wrote:
> On Tue, Mar 15, 2022 at 05:42:29PM -0700, Kui-Feng Lee wrote:
> > BPF trampolines will create a bpf_trace_run_ctx on their stacks,
> > and
> > set/reset the current bpf_run_ctx whenever calling/returning from a
> > bpf_prog.
> > 
> > Signed-off-by: Kui-Feng Lee <kuifeng@xxxxxx>
> > ---
> >  arch/x86/net/bpf_jit_comp.c | 32 ++++++++++++++++++++++++++++++++
> >  include/linux/bpf.h         | 12 ++++++++----
> >  kernel/bpf/syscall.c        |  4 ++--
> >  kernel/bpf/trampoline.c     | 21 +++++++++++++++++----
> >  4 files changed, 59 insertions(+), 10 deletions(-)
> > 
> > diff --git a/arch/x86/net/bpf_jit_comp.c
> > b/arch/x86/net/bpf_jit_comp.c
> > index 1228e6e6a420..29775a475513 100644
> > --- a/arch/x86/net/bpf_jit_comp.c
> > +++ b/arch/x86/net/bpf_jit_comp.c
> > @@ -1748,10 +1748,33 @@ static int invoke_bpf_prog(const struct
> > btf_func_model *m, u8 **pprog,
> >  {
> >         u8 *prog = *pprog;
> >         u8 *jmp_insn;
> > +       int ctx_cookie_off = offsetof(struct bpf_trace_run_ctx,
> > bpf_cookie);
> >         struct bpf_prog *p = l->prog;
> >  
> > +       EMIT1(0x52);             /* push rdx */
> 
> Why save/restore rdx?

> 
> > +
> > +       /* mov rdi, 0 */
> > +       emit_mov_imm64(&prog, BPF_REG_1, 0, 0);
> > +
> > +       /* Prepare struct bpf_trace_run_ctx.
> > +        * sub rsp, sizeof(struct bpf_trace_run_ctx)
> > +        * mov rax, rsp
> > +        * mov QWORD PTR [rax + ctx_cookie_off], rdi
> > +        */
> 
> How about the following instead:
> sub rsp, sizeof(struct bpf_trace_run_ctx)
> mov qword ptr [rsp + ctx_cookie_off], 0
> ?

AFAIK, rsp can not be used with the base + displacement addressing
mode.  Although, it can be used with base + index + displacement
addressing mode.

> 
> > +       EMIT4(0x48, 0x83, 0xEC, sizeof(struct bpf_trace_run_ctx));
> > +       EMIT3(0x48, 0x89, 0xE0);
> > +       EMIT4(0x48, 0x89, 0x78, ctx_cookie_off);
> > +
> > +       /* mov rdi, rsp */
> > +       EMIT3(0x48, 0x89, 0xE7);
> > +       /* mov QWORD PTR [rdi + sizeof(struct bpf_trace_run_ctx)],
> > rax */
> > +       emit_stx(&prog, BPF_DW, BPF_REG_1, BPF_REG_0, sizeof(struct
> > bpf_trace_run_ctx));
> 
> why not to do:
> mov qword ptr[rsp + sizeof(struct bpf_trace_run_ctx)], rsp
> ?

The same reason as above.

> 
> > +
> >         /* arg1: mov rdi, progs[i] */
> >         emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32)
> > (long) p);
> > +       /* arg2: mov rsi, rsp (struct bpf_run_ctx *) */
> > +       EMIT3(0x48, 0x89, 0xE6);
> > +
> >         if (emit_call(&prog,
> >                       p->aux->sleepable ?
> > __bpf_prog_enter_sleepable :
> >                       __bpf_prog_enter, prog))
> > @@ -1797,11 +1820,20 @@ static int invoke_bpf_prog(const struct
> > btf_func_model *m, u8 **pprog,
> >         emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32)
> > (long) p);
> >         /* arg2: mov rsi, rbx <- start time in nsec */
> >         emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
> > +       /* arg3: mov rdx, rsp (struct bpf_run_ctx *) */
> > +       EMIT3(0x48, 0x89, 0xE2);
> >         if (emit_call(&prog,
> >                       p->aux->sleepable ? __bpf_prog_exit_sleepable
> > :
> >                       __bpf_prog_exit, prog))
> >                         return -EINVAL;
> >  
> > +       /* pop struct bpf_trace_run_ctx
> > +        * add rsp, sizeof(struct bpf_trace_run_ctx)
> > +        */
> > +       EMIT4(0x48, 0x83, 0xC4, sizeof(struct bpf_trace_run_ctx));
> 
> the sub rsp; add rsp pair for every prog call will add up.
> Could you do sub rsp once at the beginning of trampoline?
> And move
> mov qword ptr[rsp + sizeof(struct bpf_trace_run_ctx)], rsp
> to the beginning as well?

Ok! I will move this part.

> 
> > +
> > +       EMIT1(0x5A); /* pop rdx */
> > +
> >         *pprog = prog;
> >         return 0;
> >  }
> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > index 3dcae8550c21..d20a23953696 100644
> > --- a/include/linux/bpf.h
> > +++ b/include/linux/bpf.h
> > @@ -681,6 +681,8 @@ struct bpf_tramp_links {
> >         int nr_links;
> >  };
> >  
> > +struct bpf_trace_run_ctx;
> > +
> >  /* Different use cases for BPF trampoline:
> >   * 1. replace nop at the function entry (kprobe equivalent)
> >   *    flags = BPF_TRAMP_F_RESTORE_REGS
> > @@ -707,10 +709,11 @@ int arch_prepare_bpf_trampoline(struct
> > bpf_tramp_image *tr, void *image, void *i
> >                                 struct bpf_tramp_links *tlinks,
> >                                 void *orig_call);
> >  /* these two functions are called from generated trampoline */
> > -u64 notrace __bpf_prog_enter(struct bpf_prog *prog);
> > -void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
> > -u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog);
> > -void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64
> > start);
> > +u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct
> > bpf_trace_run_ctx *run_ctx);
> > +void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start,
> > struct bpf_trace_run_ctx *run_ctx);
> > +u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog,
> > struct bpf_trace_run_ctx *run_ctx);
> > +void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64
> > start,
> > +                                      struct bpf_trace_run_ctx
> > *run_ctx);
> >  void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr);
> >  void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr);
> >  
> > @@ -1291,6 +1294,7 @@ struct bpf_cg_run_ctx {
> >  struct bpf_trace_run_ctx {
> >         struct bpf_run_ctx run_ctx;
> >         u64 bpf_cookie;
> > +       struct bpf_run_ctx *saved_run_ctx;
> >  };
> >  
> >  static inline struct bpf_run_ctx *bpf_set_run_ctx(struct
> > bpf_run_ctx *new_ctx)
> > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> > index fecfc803785d..a289ef55ea17 100644
> > --- a/kernel/bpf/syscall.c
> > +++ b/kernel/bpf/syscall.c
> > @@ -4793,13 +4793,13 @@ BPF_CALL_3(bpf_sys_bpf, int, cmd, union
> > bpf_attr *, attr, u32, attr_size)
> >                         return -EINVAL;
> >                 }
> >  
> > -               if (!__bpf_prog_enter_sleepable(prog)) {
> > +               if (!__bpf_prog_enter_sleepable(prog, NULL)) {
> >                         /* recursion detected */
> >                         bpf_prog_put(prog);
> >                         return -EBUSY;
> >                 }
> >                 attr->test.retval = bpf_prog_run(prog, (void *)
> > (long) attr->test.ctx_in);
> > -               __bpf_prog_exit_sleepable(prog, 0 /* bpf_prog_run
> > does runtime stats */);
> > +               __bpf_prog_exit_sleepable(prog, 0 /* bpf_prog_run
> > does runtime stats */, NULL);
> >                 bpf_prog_put(prog);
> >                 return 0;
> >  #endif
> > diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
> > index 54c695d49ec9..0b050aa2f159 100644
> > --- a/kernel/bpf/trampoline.c
> > +++ b/kernel/bpf/trampoline.c
> > @@ -580,9 +580,12 @@ static void notrace inc_misses_counter(struct
> > bpf_prog *prog)
> >   * [2..MAX_U64] - execute bpf prog and record execution time.
> >   *     This is start time.
> >   */
> > -u64 notrace __bpf_prog_enter(struct bpf_prog *prog)
> > +u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct
> > bpf_trace_run_ctx *run_ctx)
> >         __acquires(RCU)
> >  {
> > +       if (run_ctx)
> > +               run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx-
> > >run_ctx);
> 
> We can remove these branches from critical path if we path actual
> run_ctx
> instead of NULL in bpf_sys_bpf, right?

Yes!

> 
> > +
> >         rcu_read_lock();
> >         migrate_disable();
> >         if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1))
> > {
> > @@ -614,17 +617,23 @@ static void notrace update_prog_stats(struct
> > bpf_prog *prog,
> >         }
> >  }
> >  
> > -void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
> > +void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start,
> > struct bpf_trace_run_ctx *run_ctx)
> >         __releases(RCU)
> >  {
> > +       if (run_ctx)
> > +               bpf_reset_run_ctx(run_ctx->saved_run_ctx);
> > +
> >         update_prog_stats(prog, start);
> >         __this_cpu_dec(*(prog->active));
> >         migrate_enable();
> >         rcu_read_unlock();
> >  }
> >  
> > -u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog)
> > +u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog,
> > struct bpf_trace_run_ctx *run_ctx)
> >  {
> > +       if (run_ctx)
> > +               run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx-
> > >run_ctx);
> > +
> >         rcu_read_lock_trace();
> >         migrate_disable();
> >         might_fault();
> > @@ -635,8 +644,12 @@ u64 notrace __bpf_prog_enter_sleepable(struct
> > bpf_prog *prog)
> >         return bpf_prog_start_time();
> >  }
> >  
> > -void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64
> > start)
> > +void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64
> > start,
> > +                                      struct bpf_trace_run_ctx
> > *run_ctx)
> >  {
> > +       if (run_ctx)
> > +               bpf_reset_run_ctx(run_ctx->saved_run_ctx);
> > +
> >         update_prog_stats(prog, start);
> >         __this_cpu_dec(*(prog->active));
> >         migrate_enable();
> > -- 
> > 2.30.2
> > 
> 





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux