Patch "perf,x86: avoid missing caller address in stack traces captured in uprobe" has been added to the 6.10-stable tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This is a note to let you know that I've just added the patch titled

    perf,x86: avoid missing caller address in stack traces captured in uprobe

to the 6.10-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     perf-x86-avoid-missing-caller-address-in-stack-trace.patch
and it can be found in the queue-6.10 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@xxxxxxxxxxxxxxx> know about it.



commit a8b6632c02c45eb65a8fa2aab392b485c373c415
Author: Andrii Nakryiko <andrii@xxxxxxxxxx>
Date:   Mon Jul 29 10:52:23 2024 -0700

    perf,x86: avoid missing caller address in stack traces captured in uprobe
    
    [ Upstream commit cfa7f3d2c526c224a6271cc78a4a27a0de06f4f0 ]
    
    When tracing user functions with uprobe functionality, it's common to
    install the probe (e.g., a BPF program) at the first instruction of the
    function. This is often going to be `push %rbp` instruction in function
    preamble, which means that within that function frame pointer hasn't
    been established yet. This leads to consistently missing an actual
    caller of the traced function, because perf_callchain_user() only
    records current IP (capturing traced function) and then following frame
    pointer chain (which would be caller's frame, containing the address of
    caller's caller).
    
    So when we have target_1 -> target_2 -> target_3 call chain and we are
    tracing an entry to target_3, captured stack trace will report
    target_1 -> target_3 call chain, which is wrong and confusing.
    
    This patch proposes a x86-64-specific heuristic to detect `push %rbp`
    (`push %ebp` on 32-bit architecture) instruction being traced. Given
    entire kernel implementation of user space stack trace capturing works
    under assumption that user space code was compiled with frame pointer
    register (%rbp/%ebp) preservation, it seems pretty reasonable to use
    this instruction as a strong indicator that this is the entry to the
    function. In that case, return address is still pointed to by %rsp/%esp,
    so we fetch it and add to stack trace before proceeding to unwind the
    rest using frame pointer-based logic.
    
    We also check for `endbr64` (for 64-bit modes) as another common pattern
    for function entry, as suggested by Josh Poimboeuf. Even if we get this
    wrong sometimes for uprobes attached not at the function entry, it's OK
    because stack trace will still be overall meaningful, just with one
    extra bogus entry. If we don't detect this, we end up with guaranteed to
    be missing caller function entry in the stack trace, which is worse
    overall.
    
    Signed-off-by: Andrii Nakryiko <andrii@xxxxxxxxxx>
    Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
    Link: https://lkml.kernel.org/r/20240729175223.23914-1-andrii@xxxxxxxxxx
    Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx>

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 83d12dd3f831a..d77a97056844b 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -41,6 +41,8 @@
 #include <asm/desc.h>
 #include <asm/ldt.h>
 #include <asm/unwind.h>
+#include <asm/uprobes.h>
+#include <asm/ibt.h>
 
 #include "perf_event.h"
 
@@ -2814,6 +2816,46 @@ static unsigned long get_segment_base(unsigned int segment)
 	return get_desc_base(desc);
 }
 
+#ifdef CONFIG_UPROBES
+/*
+ * Heuristic-based check if uprobe is installed at the function entry.
+ *
+ * Under assumption of user code being compiled with frame pointers,
+ * `push %rbp/%ebp` is a good indicator that we indeed are.
+ *
+ * Similarly, `endbr64` (assuming 64-bit mode) is also a common pattern.
+ * If we get this wrong, captured stack trace might have one extra bogus
+ * entry, but the rest of stack trace will still be meaningful.
+ */
+static bool is_uprobe_at_func_entry(struct pt_regs *regs)
+{
+	struct arch_uprobe *auprobe;
+
+	if (!current->utask)
+		return false;
+
+	auprobe = current->utask->auprobe;
+	if (!auprobe)
+		return false;
+
+	/* push %rbp/%ebp */
+	if (auprobe->insn[0] == 0x55)
+		return true;
+
+	/* endbr64 (64-bit only) */
+	if (user_64bit_mode(regs) && is_endbr(*(u32 *)auprobe->insn))
+		return true;
+
+	return false;
+}
+
+#else
+static bool is_uprobe_at_func_entry(struct pt_regs *regs)
+{
+	return false;
+}
+#endif /* CONFIG_UPROBES */
+
 #ifdef CONFIG_IA32_EMULATION
 
 #include <linux/compat.h>
@@ -2825,6 +2867,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent
 	unsigned long ss_base, cs_base;
 	struct stack_frame_ia32 frame;
 	const struct stack_frame_ia32 __user *fp;
+	u32 ret_addr;
 
 	if (user_64bit_mode(regs))
 		return 0;
@@ -2834,6 +2877,12 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent
 
 	fp = compat_ptr(ss_base + regs->bp);
 	pagefault_disable();
+
+	/* see perf_callchain_user() below for why we do this */
+	if (is_uprobe_at_func_entry(regs) &&
+	    !get_user(ret_addr, (const u32 __user *)regs->sp))
+		perf_callchain_store(entry, ret_addr);
+
 	while (entry->nr < entry->max_stack) {
 		if (!valid_user_frame(fp, sizeof(frame)))
 			break;
@@ -2862,6 +2911,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
 {
 	struct stack_frame frame;
 	const struct stack_frame __user *fp;
+	unsigned long ret_addr;
 
 	if (perf_guest_state()) {
 		/* TODO: We don't support guest os callchain now */
@@ -2885,6 +2935,19 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
 		return;
 
 	pagefault_disable();
+
+	/*
+	 * If we are called from uprobe handler, and we are indeed at the very
+	 * entry to user function (which is normally a `push %rbp` instruction,
+	 * under assumption of application being compiled with frame pointers),
+	 * we should read return address from *regs->sp before proceeding
+	 * to follow frame pointers, otherwise we'll skip immediate caller
+	 * as %rbp is not yet setup.
+	 */
+	if (is_uprobe_at_func_entry(regs) &&
+	    !get_user(ret_addr, (const unsigned long __user *)regs->sp))
+		perf_callchain_store(entry, ret_addr);
+
 	while (entry->nr < entry->max_stack) {
 		if (!valid_user_frame(fp, sizeof(frame)))
 			break;
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index f46e0ca0169c7..d91e32aff5a13 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -76,6 +76,8 @@ struct uprobe_task {
 	struct uprobe			*active_uprobe;
 	unsigned long			xol_vaddr;
 
+	struct arch_uprobe              *auprobe;
+
 	struct return_instance		*return_instances;
 	unsigned int			depth;
 };
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 47cdec3e1df11..28c678c8daef3 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -2071,6 +2071,7 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
 	bool need_prep = false; /* prepare return uprobe, when needed */
 
 	down_read(&uprobe->register_rwsem);
+	current->utask->auprobe = &uprobe->arch;
 	for (uc = uprobe->consumers; uc; uc = uc->next) {
 		int rc = 0;
 
@@ -2085,6 +2086,7 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
 
 		remove &= rc;
 	}
+	current->utask->auprobe = NULL;
 
 	if (need_prep && !remove)
 		prepare_uretprobe(uprobe, regs); /* put bp at return */




[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux