On Fri, 2022-03-18 at 12:09 -0700, Alexei Starovoitov wrote: > On Tue, Mar 15, 2022 at 05:42:29PM -0700, Kui-Feng Lee wrote: > > BPF trampolines will create a bpf_trace_run_ctx on their stacks, > > and > > set/reset the current bpf_run_ctx whenever calling/returning from a > > bpf_prog. > > > > Signed-off-by: Kui-Feng Lee <kuifeng@xxxxxx> > > --- > > arch/x86/net/bpf_jit_comp.c | 32 ++++++++++++++++++++++++++++++++ > > include/linux/bpf.h | 12 ++++++++---- > > kernel/bpf/syscall.c | 4 ++-- > > kernel/bpf/trampoline.c | 21 +++++++++++++++++---- > > 4 files changed, 59 insertions(+), 10 deletions(-) > > > > diff --git a/arch/x86/net/bpf_jit_comp.c > > b/arch/x86/net/bpf_jit_comp.c > > index 1228e6e6a420..29775a475513 100644 > > --- a/arch/x86/net/bpf_jit_comp.c > > +++ b/arch/x86/net/bpf_jit_comp.c > > @@ -1748,10 +1748,33 @@ static int invoke_bpf_prog(const struct > > btf_func_model *m, u8 **pprog, > > { > > u8 *prog = *pprog; > > u8 *jmp_insn; > > + int ctx_cookie_off = offsetof(struct bpf_trace_run_ctx, > > bpf_cookie); > > struct bpf_prog *p = l->prog; > > > > + EMIT1(0x52); /* push rdx */ > > Why save/restore rdx? > > > + > > + /* mov rdi, 0 */ > > + emit_mov_imm64(&prog, BPF_REG_1, 0, 0); > > + > > + /* Prepare struct bpf_trace_run_ctx. > > + * sub rsp, sizeof(struct bpf_trace_run_ctx) > > + * mov rax, rsp > > + * mov QWORD PTR [rax + ctx_cookie_off], rdi > > + */ > > How about the following instead: > sub rsp, sizeof(struct bpf_trace_run_ctx) > mov qword ptr [rsp + ctx_cookie_off], 0 > ? AFAIK, rsp can not be used with the base + displacement addressing mode. Although, it can be used with base + index + displacement addressing mode. > > > + EMIT4(0x48, 0x83, 0xEC, sizeof(struct bpf_trace_run_ctx)); > > + EMIT3(0x48, 0x89, 0xE0); > > + EMIT4(0x48, 0x89, 0x78, ctx_cookie_off); > > + > > + /* mov rdi, rsp */ > > + EMIT3(0x48, 0x89, 0xE7); > > + /* mov QWORD PTR [rdi + sizeof(struct bpf_trace_run_ctx)], > > rax */ > > + emit_stx(&prog, BPF_DW, BPF_REG_1, BPF_REG_0, sizeof(struct > > bpf_trace_run_ctx)); > > why not to do: > mov qword ptr[rsp + sizeof(struct bpf_trace_run_ctx)], rsp > ? The same reason as above. > > > + > > /* arg1: mov rdi, progs[i] */ > > emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) > > (long) p); > > + /* arg2: mov rsi, rsp (struct bpf_run_ctx *) */ > > + EMIT3(0x48, 0x89, 0xE6); > > + > > if (emit_call(&prog, > > p->aux->sleepable ? > > __bpf_prog_enter_sleepable : > > __bpf_prog_enter, prog)) > > @@ -1797,11 +1820,20 @@ static int invoke_bpf_prog(const struct > > btf_func_model *m, u8 **pprog, > > emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) > > (long) p); > > /* arg2: mov rsi, rbx <- start time in nsec */ > > emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6); > > + /* arg3: mov rdx, rsp (struct bpf_run_ctx *) */ > > + EMIT3(0x48, 0x89, 0xE2); > > if (emit_call(&prog, > > p->aux->sleepable ? __bpf_prog_exit_sleepable > > : > > __bpf_prog_exit, prog)) > > return -EINVAL; > > > > + /* pop struct bpf_trace_run_ctx > > + * add rsp, sizeof(struct bpf_trace_run_ctx) > > + */ > > + EMIT4(0x48, 0x83, 0xC4, sizeof(struct bpf_trace_run_ctx)); > > the sub rsp; add rsp pair for every prog call will add up. > Could you do sub rsp once at the beginning of trampoline? > And move > mov qword ptr[rsp + sizeof(struct bpf_trace_run_ctx)], rsp > to the beginning as well? Ok! I will move this part. > > > + > > + EMIT1(0x5A); /* pop rdx */ > > + > > *pprog = prog; > > return 0; > > } > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > > index 3dcae8550c21..d20a23953696 100644 > > --- a/include/linux/bpf.h > > +++ b/include/linux/bpf.h > > @@ -681,6 +681,8 @@ struct bpf_tramp_links { > > int nr_links; > > }; > > > > +struct bpf_trace_run_ctx; > > + > > /* Different use cases for BPF trampoline: > > * 1. replace nop at the function entry (kprobe equivalent) > > * flags = BPF_TRAMP_F_RESTORE_REGS > > @@ -707,10 +709,11 @@ int arch_prepare_bpf_trampoline(struct > > bpf_tramp_image *tr, void *image, void *i > > struct bpf_tramp_links *tlinks, > > void *orig_call); > > /* these two functions are called from generated trampoline */ > > -u64 notrace __bpf_prog_enter(struct bpf_prog *prog); > > -void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start); > > -u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog); > > -void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 > > start); > > +u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct > > bpf_trace_run_ctx *run_ctx); > > +void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, > > struct bpf_trace_run_ctx *run_ctx); > > +u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, > > struct bpf_trace_run_ctx *run_ctx); > > +void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 > > start, > > + struct bpf_trace_run_ctx > > *run_ctx); > > void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr); > > void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr); > > > > @@ -1291,6 +1294,7 @@ struct bpf_cg_run_ctx { > > struct bpf_trace_run_ctx { > > struct bpf_run_ctx run_ctx; > > u64 bpf_cookie; > > + struct bpf_run_ctx *saved_run_ctx; > > }; > > > > static inline struct bpf_run_ctx *bpf_set_run_ctx(struct > > bpf_run_ctx *new_ctx) > > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c > > index fecfc803785d..a289ef55ea17 100644 > > --- a/kernel/bpf/syscall.c > > +++ b/kernel/bpf/syscall.c > > @@ -4793,13 +4793,13 @@ BPF_CALL_3(bpf_sys_bpf, int, cmd, union > > bpf_attr *, attr, u32, attr_size) > > return -EINVAL; > > } > > > > - if (!__bpf_prog_enter_sleepable(prog)) { > > + if (!__bpf_prog_enter_sleepable(prog, NULL)) { > > /* recursion detected */ > > bpf_prog_put(prog); > > return -EBUSY; > > } > > attr->test.retval = bpf_prog_run(prog, (void *) > > (long) attr->test.ctx_in); > > - __bpf_prog_exit_sleepable(prog, 0 /* bpf_prog_run > > does runtime stats */); > > + __bpf_prog_exit_sleepable(prog, 0 /* bpf_prog_run > > does runtime stats */, NULL); > > bpf_prog_put(prog); > > return 0; > > #endif > > diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c > > index 54c695d49ec9..0b050aa2f159 100644 > > --- a/kernel/bpf/trampoline.c > > +++ b/kernel/bpf/trampoline.c > > @@ -580,9 +580,12 @@ static void notrace inc_misses_counter(struct > > bpf_prog *prog) > > * [2..MAX_U64] - execute bpf prog and record execution time. > > * This is start time. > > */ > > -u64 notrace __bpf_prog_enter(struct bpf_prog *prog) > > +u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct > > bpf_trace_run_ctx *run_ctx) > > __acquires(RCU) > > { > > + if (run_ctx) > > + run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx- > > >run_ctx); > > We can remove these branches from critical path if we path actual > run_ctx > instead of NULL in bpf_sys_bpf, right? Yes! > > > + > > rcu_read_lock(); > > migrate_disable(); > > if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) > > { > > @@ -614,17 +617,23 @@ static void notrace update_prog_stats(struct > > bpf_prog *prog, > > } > > } > > > > -void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start) > > +void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, > > struct bpf_trace_run_ctx *run_ctx) > > __releases(RCU) > > { > > + if (run_ctx) > > + bpf_reset_run_ctx(run_ctx->saved_run_ctx); > > + > > update_prog_stats(prog, start); > > __this_cpu_dec(*(prog->active)); > > migrate_enable(); > > rcu_read_unlock(); > > } > > > > -u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog) > > +u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, > > struct bpf_trace_run_ctx *run_ctx) > > { > > + if (run_ctx) > > + run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx- > > >run_ctx); > > + > > rcu_read_lock_trace(); > > migrate_disable(); > > might_fault(); > > @@ -635,8 +644,12 @@ u64 notrace __bpf_prog_enter_sleepable(struct > > bpf_prog *prog) > > return bpf_prog_start_time(); > > } > > > > -void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 > > start) > > +void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 > > start, > > + struct bpf_trace_run_ctx > > *run_ctx) > > { > > + if (run_ctx) > > + bpf_reset_run_ctx(run_ctx->saved_run_ctx); > > + > > update_prog_stats(prog, start); > > __this_cpu_dec(*(prog->active)); > > migrate_enable(); > > -- > > 2.30.2 > > >