Re: [PATCH -tip 0/5] kprobes: Fix stacktrace in kretprobes

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Josh and Daniel,

On Wed, 10 Mar 2021 09:08:45 -0600
Josh Poimboeuf <jpoimboe@xxxxxxxxxx> wrote:

> On Wed, Mar 10, 2021 at 06:57:34PM +0900, Masami Hiramatsu wrote:
> > > If I understand correctly, for #1 you need an unwind hint which treats
> > > the instruction *after* the "pushq %rsp" as the beginning of the
> > > function.
> > 
> > Thanks for the patch. In that case, should I still change the stack allocation?
> > Or can I continue to use a series of "push/pop" ?
> 
> You can continue to use push/pop.  Objtool is only getting confused by
> the unbalanced stack of the function (more pushes than pops).  The
> unwind hint should fix that.

With you patch, I made a fix for ORC unwinder. I confirmed it works with
2 multiple kretprobes on the call path like this ;

cd /sys/kernel/debug/tracing/
echo r vfs_read >> kprobe_events
echo r full_proxy_read >> kprobe_events
echo traceoff:1 > events/kprobes/r_vfs_read_0/trigger
echo stacktrace:1 > events/kprobes/r_full_proxy_read_0/trigger
echo 1 > events/kprobes/enable
echo 1 > options/sym-offset
cat /sys/kernel/debug/kprobes/list
echo 0 > events/kprobes/enable
cat trace

# tracer: nop
#
# entries-in-buffer/entries-written: 3/3   #P:8
#
#                                _-----=> irqs-off
#                               / _----=> need-resched
#                              | / _---=> hardirq/softirq
#                              || / _--=> preempt-depth
#                              ||| /     delay
#           TASK-PID     CPU#  ||||   TIMESTAMP  FUNCTION
#              | |         |   ||||      |         |
           <...>-136     [004] ...1   648.481281: r_full_proxy_read_0: (vfs_read+0xab/0x1a0 <- full_proxy_read)
           <...>-136     [004] ...1   648.481310: <stack trace>
 => kretprobe_trace_func+0x209/0x2f0
 => kretprobe_dispatcher+0x4a/0x70
 => __kretprobe_trampoline_handler+0xcd/0x170
 => trampoline_handler+0x3d/0x50
 => kretprobe_trampoline+0x25/0x50
 => vfs_read+0xab/0x1a0
 => ksys_read+0x5f/0xe0
 => do_syscall_64+0x33/0x40
 => entry_SYSCALL_64_after_hwframe+0x44/0xae
 => 0
 => 0
 => 0
 => 0
 => 0
 => 0
 => 0

I didn't tested it with bpftrace, but this also handles 
regs->ip == kretprobe_trampoline case. So it should work.

commit aa452d999b524b1851f69cc947be3e1a2f3ca1ec
Author: Masami Hiramatsu <mhiramat@xxxxxxxxxx>
Date:   Sat Mar 6 08:34:51 2021 +0900

    x86/unwind/orc: Fixup kretprobe trampoline entry
    
    Since the kretprobe replaces the function return address with
    the kretprobe_trampoline on the stack, the ORC unwinder can not
    continue the stack unwinding at that point.
    
    To fix this issue, correct state->ip as like as function-graph
    tracer in the unwind_next_frame().
    
    Signed-off-by: Masami Hiramatsu <mhiramat@xxxxxxxxxx>

diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index 70fc159ebe69..ab5e45b848d5 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -4,6 +4,7 @@
 
 #include <linux/sched.h>
 #include <linux/ftrace.h>
+#include <linux/llist.h>
 #include <asm/ptrace.h>
 #include <asm/stacktrace.h>
 
@@ -20,6 +21,9 @@ struct unwind_state {
 	bool signal, full_regs;
 	unsigned long sp, bp, ip;
 	struct pt_regs *regs, *prev_regs;
+#if defined(CONFIG_KRETPROBES)
+	struct llist_node *kr_iter;
+#endif
 #elif defined(CONFIG_UNWINDER_FRAME_POINTER)
 	bool got_irq;
 	unsigned long *bp, *orig_sp, ip;
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 2a1d47f47eee..94869516cfc0 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -2,6 +2,7 @@
 #include <linux/objtool.h>
 #include <linux/module.h>
 #include <linux/sort.h>
+#include <linux/kprobes.h>
 #include <asm/ptrace.h>
 #include <asm/stacktrace.h>
 #include <asm/unwind.h>
@@ -414,6 +415,30 @@ static bool get_reg(struct unwind_state *state, unsigned int reg_off,
 	return false;
 }
 
+#ifdef CONFIG_KRETPROBES
+static unsigned long orc_kretprobe_correct_ip(struct unwind_state *state)
+{
+	return kretprobe_find_ret_addr(
+			(unsigned long)kretprobe_trampoline_addr(),
+			state->task, &state->kr_iter);
+}
+
+static bool is_kretprobe_trampoline_address(unsigned long ip)
+{
+	return ip == (unsigned long)kretprobe_trampoline_addr();
+}
+#else
+static unsigned long orc_kretprobe_correct_ip(struct unwind_state *state)
+{
+	return state->ip;
+}
+
+static bool is_kretprobe_trampoline_address(unsigned long ip)
+{
+	return false;
+}
+#endif
+
 bool unwind_next_frame(struct unwind_state *state)
 {
 	unsigned long ip_p, sp, tmp, orig_ip = state->ip, prev_sp = state->sp;
@@ -536,6 +561,18 @@ bool unwind_next_frame(struct unwind_state *state)
 
 		state->ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
 						  state->ip, (void *)ip_p);
+		/*
+		 * There are special cases when the stack unwinder is called
+		 * from the kretprobe handler or the interrupt handler which
+		 * occurs in the kretprobe trampoline code. In those cases,
+		 * %sp is shown on the stack instead of the return address.
+		 * Or, when the unwinder find the return address is replaced
+		 * by kretprobe_trampoline.
+		 * In those cases, correct address can be found in kretprobe.
+		 */
+		if (state->ip == sp ||
+		    is_kretprobe_trampoline_address(state->ip))
+			state->ip = orc_kretprobe_correct_ip(state);
 
 		state->sp = sp;
 		state->regs = NULL;
@@ -649,6 +686,12 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
 		state->full_regs = true;
 		state->signal = true;
 
+		/*
+		 * When the unwinder called with regs from kretprobe handler,
+		 * the regs->ip starts from kretprobe_trampoline address.
+		 */
+		if (is_kretprobe_trampoline_address(state->ip))
+			state->ip = orc_kretprobe_correct_ip(state);
 	} else if (task == current) {
 		asm volatile("lea (%%rip), %0\n\t"
 			     "mov %%rsp, %1\n\t"
-- 
Masami Hiramatsu <mhiramat@xxxxxxxxxx>



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux