On Wed, 3 Mar 2021 09:26:04 -0500 Steven Rostedt <rostedt@xxxxxxxxxxx> wrote: > On Wed, 3 Mar 2021 13:48:28 +0900 > Masami Hiramatsu <mhiramat@xxxxxxxxxx> wrote: > > > > > > > > > I think (can't prove) this used to work: > > Would be good to find out if it did. > > > > > I'm not sure the bpftrace had correctly handled it or not. > > > > > > > > # bpftrace -e 'kretprobe:__tcp_retransmit_skb { @[kstack()] = count() }' > > > Attaching 1 probe... > > > ^C > > > > > > @[ > > > kretprobe_trampoline+0 > > > ]: 1 > > > > Would you know how the bpftrace stacktracer rewinds the stack entries? > > FYI, ftrace does it in trace_seq_print_sym()@kernel/trace/trace_output.c > > > > The difference between trace events and normal function tracing stack > traces is that it keeps its original return address. But kretprobes (and > function graph tracing, and some bpf trampolines too) modify the return > pointer, and that could possibly cause havoc with the stack trace. BTW, I think if the stack tracer passes the nth of kretprobe_trampoline or a cursor, kretprobe can find the correct return address from given task. I've made a patch to do that only for the CONFIG_ARCH_STACKWALK=y Here is an example on x86. # echo r vfs_read > kprobe_events # echo stacktrace > events/kprobes/r_vfs_read_0/trigger # echo 1 > events/kprobes/r_vfs_read_0/enable # echo 1 > options/sym-offset # less trace ... sh-132 [007] ...1 22.524917: <stack trace> => kretprobe_dispatcher+0x7d/0xc0 => __kretprobe_trampoline_handler+0xdb/0x1b0 => trampoline_handler+0x48/0x60 => kretprobe_trampoline+0x2a/0x50 => ksys_read+0x70/0xf0 => __x64_sys_read+0x1a/0x20 => do_syscall_64+0x38/0x50 => entry_SYSCALL_64_after_hwframe+0x44/0xae => 0 => 0 ------ >From 77a785a3a0791171b570830d0b2f099f8a4ba337 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu <mhiramat@xxxxxxxxxx> Date: Thu, 4 Mar 2021 14:19:24 +0900 Subject: [PATCH] kprobes: stacktrace: Recover the address changed by kretprobe Recover the return address on the stack which changed by the kretprobe. Signed-off-by: Masami Hiramatsu <mhiramat@xxxxxxxxxx> --- include/linux/kprobes.h | 3 ++ kernel/kprobes.c | 81 +++++++++++++++++++++++++++-------------- kernel/stacktrace.c | 26 +++++++++++++ 3 files changed, 82 insertions(+), 28 deletions(-) diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 1883a4a9f16a..a022e507d829 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -205,6 +205,9 @@ extern void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs); extern int arch_trampoline_kprobe(struct kprobe *p); +unsigned long kretprobe_real_stack_tsk(struct task_struct *tsk, + unsigned long addr, + struct llist_node **cur); /* If the trampoline handler called from a kprobe, use this version */ unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs, void *trampoline_address, diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 745f08fdd7a6..b3d9dbd6086f 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1863,46 +1863,56 @@ unsigned long __weak arch_deref_entry_point(void *entry) #ifdef CONFIG_KRETPROBES -unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs, - void *trampoline_address, - void *frame_pointer) +/* This assumes the tsk is current or the task which is not running. */ +unsigned long kretprobe_real_stack_tsk(struct task_struct *tsk, + unsigned long addr, + struct llist_node **cur) { - kprobe_opcode_t *correct_ret_addr = NULL; struct kretprobe_instance *ri = NULL; - struct llist_node *first, *node; - struct kretprobe *rp; + struct llist_node *node = *cur; - /* Find all nodes for this frame. */ - first = node = current->kretprobe_instances.first; - while (node) { - ri = container_of(node, struct kretprobe_instance, llist); + if (addr != (unsigned long)&kretprobe_trampoline) + return addr; - BUG_ON(ri->fp != frame_pointer); + if (!node) + node = tsk->kretprobe_instances.first; + else + node = node->next; - if (ri->ret_addr != trampoline_address) { - correct_ret_addr = ri->ret_addr; - /* - * This is the real return address. Any other - * instances associated with this task are for - * other calls deeper on the call stack - */ - goto found; + while (node) { + ri = container_of(node, struct kretprobe_instance, llist); + if (ri->ret_addr != (void *)&kretprobe_trampoline) { + *cur = node; + return (unsigned long)ri->ret_addr; } - node = node->next; } - pr_err("Oops! Kretprobe fails to find correct return address.\n"); - BUG_ON(1); + return 0; +} -found: - /* Unlink all nodes for this frame. */ - current->kretprobe_instances.first = node->next; - node->next = NULL; +unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs, + void *trampoline_address, + void *frame_pointer) +{ + kprobe_opcode_t *correct_ret_addr = NULL; + struct kretprobe_instance *ri = NULL; + struct llist_node *first, *node = NULL; + struct kretprobe *rp; - /* Run them.. */ + /* Find correct address and all nodes for this frame. */ + correct_ret_addr = (void*)kretprobe_real_stack_tsk(current, + (unsigned long)&kretprobe_trampoline, &node); + if (!correct_ret_addr) { + pr_err("Oops! Kretprobe fails to find correct return address.\n"); + BUG_ON(1); + } + + /* Run them. */ + first = current->kretprobe_instances.first; while (first) { ri = container_of(first, struct kretprobe_instance, llist); - first = first->next; + + BUG_ON(ri->fp != frame_pointer); rp = get_kretprobe(ri); if (rp && rp->handler) { @@ -1913,6 +1923,21 @@ unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs, rp->handler(ri, regs); __this_cpu_write(current_kprobe, prev); } + if (first == node) + break; + + first = first->next; + } + + /* Unlink all nodes for this frame. */ + first = current->kretprobe_instances.first; + current->kretprobe_instances.first = node->next; + node->next = NULL; + + /* Recycle them. */ + while (first) { + ri = container_of(first, struct kretprobe_instance, llist); + first = first->next; recycle_rp_inst(ri); } diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c index 9f8117c7cfdd..416f357e64b8 100644 --- a/kernel/stacktrace.c +++ b/kernel/stacktrace.c @@ -13,6 +13,7 @@ #include <linux/export.h> #include <linux/kallsyms.h> #include <linux/stacktrace.h> +#include <linux/kprobes.h> /** * stack_trace_print - Print the entries in the stack trace @@ -76,6 +77,10 @@ struct stacktrace_cookie { unsigned int size; unsigned int skip; unsigned int len; +#ifdef CONFIG_KRETPROBES + struct llist_node *cur; + struct task_struct *tsk; +#endif }; static bool stack_trace_consume_entry(void *cookie, unsigned long addr) @@ -89,6 +94,7 @@ static bool stack_trace_consume_entry(void *cookie, unsigned long addr) c->skip--; return true; } + addr = kretprobe_real_stack_tsk(c->tsk, addr, &c->cur); c->store[c->len++] = addr; return c->len < c->size; } @@ -116,6 +122,10 @@ unsigned int stack_trace_save(unsigned long *store, unsigned int size, .store = store, .size = size, .skip = skipnr + 1, +#ifdef CONFIG_KRETPROBES + .cur = NULL, + .tsk = current, +#endif }; arch_stack_walk(consume_entry, &c, current, NULL); @@ -141,6 +151,10 @@ unsigned int stack_trace_save_tsk(struct task_struct *tsk, unsigned long *store, .size = size, /* skip this function if they are tracing us */ .skip = skipnr + (current == tsk), +#ifdef CONFIG_KRETPROBES + .cur = NULL, + .tsk = tsk, +#endif }; if (!try_get_task_stack(tsk)) @@ -168,6 +182,10 @@ unsigned int stack_trace_save_regs(struct pt_regs *regs, unsigned long *store, .store = store, .size = size, .skip = skipnr, +#ifdef CONFIG_KRETPROBES + .cur = NULL, + .tsk = current, +#endif }; arch_stack_walk(consume_entry, &c, current, regs); @@ -194,6 +212,10 @@ int stack_trace_save_tsk_reliable(struct task_struct *tsk, unsigned long *store, struct stacktrace_cookie c = { .store = store, .size = size, +#ifdef CONFIG_KRETPROBES + .cur = NULL, + .tsk = tsk, +#endif }; int ret; @@ -224,6 +246,10 @@ unsigned int stack_trace_save_user(unsigned long *store, unsigned int size) struct stacktrace_cookie c = { .store = store, .size = size, +#ifdef CONFIG_KRETPROBES + .cur = NULL, + .tsk = current, +#endif }; mm_segment_t fs; -- 2.25.1 Thank you, -- Masami Hiramatsu <mhiramat@xxxxxxxxxx>