BPF trampolines will create a bpf_trace_run_ctx on their stacks, and set/reset the current bpf_run_ctx whenever calling/returning from a bpf_prog. Signed-off-by: Kui-Feng Lee <kuifeng@xxxxxx> --- arch/x86/net/bpf_jit_comp.c | 32 ++++++++++++++++++++++++++++++++ include/linux/bpf.h | 12 ++++++++---- kernel/bpf/syscall.c | 4 ++-- kernel/bpf/trampoline.c | 21 +++++++++++++++++---- 4 files changed, 59 insertions(+), 10 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 1228e6e6a420..29775a475513 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1748,10 +1748,33 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, { u8 *prog = *pprog; u8 *jmp_insn; + int ctx_cookie_off = offsetof(struct bpf_trace_run_ctx, bpf_cookie); struct bpf_prog *p = l->prog; + EMIT1(0x52); /* push rdx */ + + /* mov rdi, 0 */ + emit_mov_imm64(&prog, BPF_REG_1, 0, 0); + + /* Prepare struct bpf_trace_run_ctx. + * sub rsp, sizeof(struct bpf_trace_run_ctx) + * mov rax, rsp + * mov QWORD PTR [rax + ctx_cookie_off], rdi + */ + EMIT4(0x48, 0x83, 0xEC, sizeof(struct bpf_trace_run_ctx)); + EMIT3(0x48, 0x89, 0xE0); + EMIT4(0x48, 0x89, 0x78, ctx_cookie_off); + + /* mov rdi, rsp */ + EMIT3(0x48, 0x89, 0xE7); + /* mov QWORD PTR [rdi + sizeof(struct bpf_trace_run_ctx)], rax */ + emit_stx(&prog, BPF_DW, BPF_REG_1, BPF_REG_0, sizeof(struct bpf_trace_run_ctx)); + /* arg1: mov rdi, progs[i] */ emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p); + /* arg2: mov rsi, rsp (struct bpf_run_ctx *) */ + EMIT3(0x48, 0x89, 0xE6); + if (emit_call(&prog, p->aux->sleepable ? __bpf_prog_enter_sleepable : __bpf_prog_enter, prog)) @@ -1797,11 +1820,20 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p); /* arg2: mov rsi, rbx <- start time in nsec */ emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6); + /* arg3: mov rdx, rsp (struct bpf_run_ctx *) */ + EMIT3(0x48, 0x89, 0xE2); if (emit_call(&prog, p->aux->sleepable ? __bpf_prog_exit_sleepable : __bpf_prog_exit, prog)) return -EINVAL; + /* pop struct bpf_trace_run_ctx + * add rsp, sizeof(struct bpf_trace_run_ctx) + */ + EMIT4(0x48, 0x83, 0xC4, sizeof(struct bpf_trace_run_ctx)); + + EMIT1(0x5A); /* pop rdx */ + *pprog = prog; return 0; } diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3dcae8550c21..d20a23953696 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -681,6 +681,8 @@ struct bpf_tramp_links { int nr_links; }; +struct bpf_trace_run_ctx; + /* Different use cases for BPF trampoline: * 1. replace nop at the function entry (kprobe equivalent) * flags = BPF_TRAMP_F_RESTORE_REGS @@ -707,10 +709,11 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *i struct bpf_tramp_links *tlinks, void *orig_call); /* these two functions are called from generated trampoline */ -u64 notrace __bpf_prog_enter(struct bpf_prog *prog); -void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start); -u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog); -void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start); +u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_trace_run_ctx *run_ctx); +void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_trace_run_ctx *run_ctx); +u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_trace_run_ctx *run_ctx); +void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start, + struct bpf_trace_run_ctx *run_ctx); void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr); void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr); @@ -1291,6 +1294,7 @@ struct bpf_cg_run_ctx { struct bpf_trace_run_ctx { struct bpf_run_ctx run_ctx; u64 bpf_cookie; + struct bpf_run_ctx *saved_run_ctx; }; static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index fecfc803785d..a289ef55ea17 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -4793,13 +4793,13 @@ BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size) return -EINVAL; } - if (!__bpf_prog_enter_sleepable(prog)) { + if (!__bpf_prog_enter_sleepable(prog, NULL)) { /* recursion detected */ bpf_prog_put(prog); return -EBUSY; } attr->test.retval = bpf_prog_run(prog, (void *) (long) attr->test.ctx_in); - __bpf_prog_exit_sleepable(prog, 0 /* bpf_prog_run does runtime stats */); + __bpf_prog_exit_sleepable(prog, 0 /* bpf_prog_run does runtime stats */, NULL); bpf_prog_put(prog); return 0; #endif diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 54c695d49ec9..0b050aa2f159 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -580,9 +580,12 @@ static void notrace inc_misses_counter(struct bpf_prog *prog) * [2..MAX_U64] - execute bpf prog and record execution time. * This is start time. */ -u64 notrace __bpf_prog_enter(struct bpf_prog *prog) +u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_trace_run_ctx *run_ctx) __acquires(RCU) { + if (run_ctx) + run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx); + rcu_read_lock(); migrate_disable(); if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) { @@ -614,17 +617,23 @@ static void notrace update_prog_stats(struct bpf_prog *prog, } } -void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start) +void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_trace_run_ctx *run_ctx) __releases(RCU) { + if (run_ctx) + bpf_reset_run_ctx(run_ctx->saved_run_ctx); + update_prog_stats(prog, start); __this_cpu_dec(*(prog->active)); migrate_enable(); rcu_read_unlock(); } -u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog) +u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_trace_run_ctx *run_ctx) { + if (run_ctx) + run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx); + rcu_read_lock_trace(); migrate_disable(); might_fault(); @@ -635,8 +644,12 @@ u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog) return bpf_prog_start_time(); } -void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start) +void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start, + struct bpf_trace_run_ctx *run_ctx) { + if (run_ctx) + bpf_reset_run_ctx(run_ctx->saved_run_ctx); + update_prog_stats(prog, start); __this_cpu_dec(*(prog->active)); migrate_enable(); -- 2.30.2