On Tue, Mar 15, 2022 at 5:44 PM Kui-Feng Lee <kuifeng@xxxxxx> wrote: > > BPF trampolines will create a bpf_trace_run_ctx on their stacks, and > set/reset the current bpf_run_ctx whenever calling/returning from a > bpf_prog. > > Signed-off-by: Kui-Feng Lee <kuifeng@xxxxxx> > --- > arch/x86/net/bpf_jit_comp.c | 32 ++++++++++++++++++++++++++++++++ > include/linux/bpf.h | 12 ++++++++---- > kernel/bpf/syscall.c | 4 ++-- > kernel/bpf/trampoline.c | 21 +++++++++++++++++---- > 4 files changed, 59 insertions(+), 10 deletions(-) > > diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c > index 1228e6e6a420..29775a475513 100644 > --- a/arch/x86/net/bpf_jit_comp.c > +++ b/arch/x86/net/bpf_jit_comp.c > @@ -1748,10 +1748,33 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, > { > u8 *prog = *pprog; > u8 *jmp_insn; > + int ctx_cookie_off = offsetof(struct bpf_trace_run_ctx, bpf_cookie); > struct bpf_prog *p = l->prog; > > + EMIT1(0x52); /* push rdx */ > + > + /* mov rdi, 0 */ > + emit_mov_imm64(&prog, BPF_REG_1, 0, 0); > + > + /* Prepare struct bpf_trace_run_ctx. > + * sub rsp, sizeof(struct bpf_trace_run_ctx) > + * mov rax, rsp > + * mov QWORD PTR [rax + ctx_cookie_off], rdi > + */ > + EMIT4(0x48, 0x83, 0xEC, sizeof(struct bpf_trace_run_ctx)); > + EMIT3(0x48, 0x89, 0xE0); > + EMIT4(0x48, 0x89, 0x78, ctx_cookie_off); > + > + /* mov rdi, rsp */ > + EMIT3(0x48, 0x89, 0xE7); > + /* mov QWORD PTR [rdi + sizeof(struct bpf_trace_run_ctx)], rax */ > + emit_stx(&prog, BPF_DW, BPF_REG_1, BPF_REG_0, sizeof(struct bpf_trace_run_ctx)); > + > /* arg1: mov rdi, progs[i] */ > emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p); > + /* arg2: mov rsi, rsp (struct bpf_run_ctx *) */ > + EMIT3(0x48, 0x89, 0xE6); > + > if (emit_call(&prog, > p->aux->sleepable ? __bpf_prog_enter_sleepable : > __bpf_prog_enter, prog)) > @@ -1797,11 +1820,20 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, > emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p); > /* arg2: mov rsi, rbx <- start time in nsec */ > emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6); > + /* arg3: mov rdx, rsp (struct bpf_run_ctx *) */ > + EMIT3(0x48, 0x89, 0xE2); > if (emit_call(&prog, > p->aux->sleepable ? __bpf_prog_exit_sleepable : > __bpf_prog_exit, prog)) > return -EINVAL; > > + /* pop struct bpf_trace_run_ctx > + * add rsp, sizeof(struct bpf_trace_run_ctx) > + */ > + EMIT4(0x48, 0x83, 0xC4, sizeof(struct bpf_trace_run_ctx)); > + > + EMIT1(0x5A); /* pop rdx */ > + > *pprog = prog; > return 0; > } > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index 3dcae8550c21..d20a23953696 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -681,6 +681,8 @@ struct bpf_tramp_links { > int nr_links; > }; > > +struct bpf_trace_run_ctx; > + > /* Different use cases for BPF trampoline: > * 1. replace nop at the function entry (kprobe equivalent) > * flags = BPF_TRAMP_F_RESTORE_REGS > @@ -707,10 +709,11 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *i > struct bpf_tramp_links *tlinks, > void *orig_call); > /* these two functions are called from generated trampoline */ > -u64 notrace __bpf_prog_enter(struct bpf_prog *prog); > -void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start); > -u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog); > -void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start); > +u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_trace_run_ctx *run_ctx); > +void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_trace_run_ctx *run_ctx); > +u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_trace_run_ctx *run_ctx); > +void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start, > + struct bpf_trace_run_ctx *run_ctx); > void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr); > void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr); > > @@ -1291,6 +1294,7 @@ struct bpf_cg_run_ctx { > struct bpf_trace_run_ctx { > struct bpf_run_ctx run_ctx; > u64 bpf_cookie; > + struct bpf_run_ctx *saved_run_ctx; > }; oh, and bpf_trace_run_ctx is used for kprobe/uprobe/tracepoint, let's add a new struct bpf_tramp_run_ctx which would reflect that it is used for BPF trampoline-based BPF programs. Otherwise it's confusing to have saved_run_ctx for kprobe where we don't use that. Similarly, if we move "start" timestamp, it will be a bit off. Not end of the world, but I think keeping them separate would make sense over long run. > > static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx) [...]