This is a note to let you know that I've just added the patch titled bpf, arm64: Remove garbage frame for struct_ops trampoline to the 6.1-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: bpf-arm64-remove-garbage-frame-for-struct_ops-trampo.patch and it can be found in the queue-6.1 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable@xxxxxxxxxxxxxxx> know about it. commit d945cd5b9b7d3ba8d8541e6df01b9bef10c36a7a Author: Xu Kuohai <xukuohai@xxxxxxxxxx> Date: Fri Oct 25 16:52:20 2024 +0800 bpf, arm64: Remove garbage frame for struct_ops trampoline [ Upstream commit 87cb58aebdf7005661a07e9fd5a900f924d48c75 ] The callsite layout for arm64 fentry is: mov x9, lr nop When a bpf prog is attached, the nop instruction is patched to a call to bpf trampoline: mov x9, lr bl <bpf trampoline> So two return addresses are passed to bpf trampoline: the return address for the traced function/prog, stored in x9, and the return address for the bpf trampoline itself, stored in lr. To obtain a full and accurate call stack, the bpf trampoline constructs two fake function frames using x9 and lr. However, struct_ops progs are invoked directly as function callbacks, meaning that x9 is not set as it is in the fentry callsite. In this case, the frame constructed using x9 is garbage. The following stack trace for struct_ops, captured by perf sampling, illustrates this issue, where tcp_ack+0x404 is a garbage frame: ffffffc0801a04b4 bpf_prog_50992e55a0f655a9_bpf_cubic_cong_avoid+0x98 (bpf_prog_50992e55a0f655a9_bpf_cubic_cong_avoid) ffffffc0801a228c [unknown] ([kernel.kallsyms]) // bpf trampoline ffffffd08d362590 tcp_ack+0x798 ([kernel.kallsyms]) // caller for bpf trampoline ffffffd08d3621fc tcp_ack+0x404 ([kernel.kallsyms]) // garbage frame ffffffd08d36452c tcp_rcv_established+0x4ac ([kernel.kallsyms]) ffffffd08d375c58 tcp_v4_do_rcv+0x1f0 ([kernel.kallsyms]) ffffffd08d378630 tcp_v4_rcv+0xeb8 ([kernel.kallsyms]) To fix it, construct only one frame using lr for struct_ops. The above stack trace also indicates that there is no kernel symbol for struct_ops bpf trampoline. This will be addressed in a follow-up patch. Fixes: efc9909fdce0 ("bpf, arm64: Add bpf trampoline for arm64") Signed-off-by: Xu Kuohai <xukuohai@xxxxxxxxxx> Acked-by: Puranjay Mohan <puranjay@xxxxxxxxxx> Tested-by: Puranjay Mohan <puranjay@xxxxxxxxxx> Link: https://lore.kernel.org/r/20241025085220.533949-1-xukuohai@xxxxxxxxxxxxxxx Signed-off-by: Alexei Starovoitov <ast@xxxxxxxxxx> Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx> diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index afb79209d4132..c04ace8f48435 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1757,6 +1757,12 @@ static void restore_args(struct jit_ctx *ctx, int args_off, int nargs) } } +static bool is_struct_ops_tramp(const struct bpf_tramp_links *fentry_links) +{ + return fentry_links->nr_links == 1 && + fentry_links->links[0]->link.type == BPF_LINK_TYPE_STRUCT_OPS; +} + /* Based on the x86's implementation of arch_prepare_bpf_trampoline(). * * bpf prog and function entry before bpf trampoline hooked: @@ -1786,6 +1792,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; bool save_ret; __le32 **branches = NULL; + bool is_struct_ops = is_struct_ops_tramp(fentry); /* trampoline stack layout: * [ parent ip ] @@ -1854,11 +1861,14 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, */ emit_bti(A64_BTI_JC, ctx); - /* frame for parent function */ - emit(A64_PUSH(A64_FP, A64_R(9), A64_SP), ctx); - emit(A64_MOV(1, A64_FP, A64_SP), ctx); + /* x9 is not set for struct_ops */ + if (!is_struct_ops) { + /* frame for parent function */ + emit(A64_PUSH(A64_FP, A64_R(9), A64_SP), ctx); + emit(A64_MOV(1, A64_FP, A64_SP), ctx); + } - /* frame for patched function */ + /* frame for patched function for tracing, or caller for struct_ops */ emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); emit(A64_MOV(1, A64_FP, A64_SP), ctx); @@ -1944,19 +1954,24 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, /* reset SP */ emit(A64_MOV(1, A64_SP, A64_FP), ctx); - /* pop frames */ - emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); - emit(A64_POP(A64_FP, A64_R(9), A64_SP), ctx); - - if (flags & BPF_TRAMP_F_SKIP_FRAME) { - /* skip patched function, return to parent */ - emit(A64_MOV(1, A64_LR, A64_R(9)), ctx); - emit(A64_RET(A64_R(9)), ctx); + if (is_struct_ops) { + emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); + emit(A64_RET(A64_LR), ctx); } else { - /* return to patched function */ - emit(A64_MOV(1, A64_R(10), A64_LR), ctx); - emit(A64_MOV(1, A64_LR, A64_R(9)), ctx); - emit(A64_RET(A64_R(10)), ctx); + /* pop frames */ + emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); + emit(A64_POP(A64_FP, A64_R(9), A64_SP), ctx); + + if (flags & BPF_TRAMP_F_SKIP_FRAME) { + /* skip patched function, return to parent */ + emit(A64_MOV(1, A64_LR, A64_R(9)), ctx); + emit(A64_RET(A64_R(9)), ctx); + } else { + /* return to patched function */ + emit(A64_MOV(1, A64_R(10), A64_LR), ctx); + emit(A64_MOV(1, A64_LR, A64_R(9)), ctx); + emit(A64_RET(A64_R(10)), ctx); + } } if (ctx->image)