On Tue, Nov 30, 2021 at 11:13:28AM -0600, Madhavan T. Venkataraman wrote: > On 11/30/21 9:05 AM, Mark Rutland wrote: > > On Tue, Nov 23, 2021 at 01:37:19PM -0600, madvenka@xxxxxxxxxxxxxxxxxxx wrote: > >> From: "Madhavan T. Venkataraman" <madvenka@xxxxxxxxxxxxxxxxxxx> > >> > >> Currently, arch_stack_walk() calls start_backtrace() and walk_stackframe() > >> separately. There is no need to do that. Instead, call start_backtrace() > >> from within walk_stackframe(). In other words, walk_stackframe() is the only > >> unwind function a consumer needs to call. > >> @@ -143,15 +140,19 @@ static int notrace unwind_frame(struct task_struct *tsk, > >> NOKPROBE_SYMBOL(unwind_frame); > >> > >> static void notrace walk_stackframe(struct task_struct *tsk, > >> - struct stackframe *frame, > >> + unsigned long fp, unsigned long pc, > >> bool (*fn)(void *, unsigned long), void *data) > >> { > >> + struct stackframe frame; > >> + > >> + start_backtrace(&frame, fp, pc); > >> + > >> while (1) { > >> int ret; > >> > >> - if (!fn(data, frame->pc)) > >> + if (!fn(data, frame.pc)) > >> break; > >> - ret = unwind_frame(tsk, frame); > >> + ret = unwind_frame(tsk, &frame); > >> if (ret < 0) > >> break; > >> } > >> @@ -195,17 +196,19 @@ noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry, > >> void *cookie, struct task_struct *task, > >> struct pt_regs *regs) > >> { > >> - struct stackframe frame; > >> - > >> - if (regs) > >> - start_backtrace(&frame, regs->regs[29], regs->pc); > >> - else if (task == current) > >> - start_backtrace(&frame, > >> - (unsigned long)__builtin_frame_address(1), > >> - (unsigned long)__builtin_return_address(0)); > >> - else > >> - start_backtrace(&frame, thread_saved_fp(task), > >> - thread_saved_pc(task)); > >> - > >> - walk_stackframe(task, &frame, consume_entry, cookie); > >> + unsigned long fp, pc; > >> + > >> + if (regs) { > >> + fp = regs->regs[29]; > >> + pc = regs->pc; > >> + } else if (task == current) { > >> + /* Skip arch_stack_walk() in the stack trace. */ > >> + fp = (unsigned long)__builtin_frame_address(1); > >> + pc = (unsigned long)__builtin_return_address(0); > >> + } else { > >> + /* Caller guarantees that the task is not running. */ > >> + fp = thread_saved_fp(task); > >> + pc = thread_saved_pc(task); > >> + } > >> + walk_stackframe(task, fp, pc, consume_entry, cookie); > > > > I'd prefer to leave this as-is. The new and old structure are largely > > equivalent, so we haven't made this any simpler, but we have added more > > arguments to walk_stackframe(). > > > > This is just to simplify things when we eventually add arch_stack_walk_reliable(). > That is all. All of the unwinding is done by a single unwinding function and > there are two consumers of that unwinding function - arch_stack_walk() and > arch_stack_walk_reliable(). I understand the theory, but I don't think that moving the start_backtrace() call actually simplifies this in a meaningful way, and I think it'll make it harder for us to make more meaningful simplifications later on. As of patch 4 of this series, we'll have: | noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry, | void *cookie, struct task_struct *task, | struct pt_regs *regs) | { | unsigned long fp, pc; | | if (regs) { | fp = regs->regs[29]; | pc = regs->pc; | } else if (task == current) { | /* Skip arch_stack_walk() in the stack trace. */ | fp = (unsigned long)__builtin_frame_address(1); | pc = (unsigned long)__builtin_return_address(0); | } else { | /* Caller guarantees that the task is not running. */ | fp = thread_saved_fp(task); | pc = thread_saved_pc(task); | } | walk_stackframe(task, fp, pc, consume_entry, cookie); | } | | noinline int notrace arch_stack_walk_reliable(stack_trace_consume_fn consume_fn, | void *cookie, | struct task_struct *task) | { | unsigned long fp, pc; | | if (task == current) { | /* Skip arch_stack_walk_reliable() in the stack trace. */ | fp = (unsigned long)__builtin_frame_address(1); | pc = (unsigned long)__builtin_return_address(0); | } else { | /* Caller guarantees that the task is not running. */ | fp = thread_saved_fp(task); | pc = thread_saved_pc(task); | } | if (unwind(task, fp, pc, consume_fn, cookie)) | return 0; | return -EINVAL; | } Which I do not think is substantially simpler than the naive extrapolation from what we currently have, e.g. | noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry, | void *cookie, struct task_struct *task, | struct pt_regs *regs) | { | struct stackframe frame; | | if (regs) { | unwind_init(&frame, regs->regs[29], regs->pc) | } else if (task == current) { | unwind_init(&frame, __builtin_frame_address(1), | __builtin_return_address(0)); | } else { | unwind_init(&frame, thread_saved_fp(task), | thread_saved_pc(task); | } | walk_stackframe(task, &frame, consume_entry, cookie); | } | | noinline int notrace arch_stack_walk_reliable(stack_trace_consume_fn consume_fn, | void *cookie, | struct task_struct *task) | { | struct stackframe frame; | | if (task == current) { | unwind_init(&frame, __builtin_frame_address(1), | __builtin_return_address(0)); | } else { | unwind_init(&frame, thread_saved_fp(task), | thread_saved_pc(task); | } | if (unwind(task, &frame, consume_fn, cookie)) | return 0; | return -EINVAL; | } Further, I think we can factor this in a different way to reduce the duplication: | /* | * TODO: document requirements here | */ | static inline void unwind_init_from_current_regs(struct stackframe *frame, | struct pt_regs *regs) | { | unwind_init(frame, regs->regs[29], regs->pc); | } | | /* | * TODO: document requirements here | */ | static inline void unwind_init_from_blocked_task(struct stackframe *frame, | struct task_struct *tsk) | { | unwind_init(&frame, thread_saved_fp(task), | thread_saved_pc(task)); | } | | /* | * TODO: document requirements here | * | * Note: this is always inlined, and we expect our caller to be a noinline | * function, such that this starts from our caller's caller. | */ | static __always_inline void unwind_init_from_caller(struct stackframe *frame) | { | unwind_init(frame, __builtin_frame_address(1), | __builtin_return_address(0)); | } | | noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry, | void *cookie, struct task_struct *task, | struct pt_regs *regs) | { | struct stackframe frame; | | if (regs) | unwind_init_current_regs(&frame, regs); | else if (task == current) | unwind_init_from_caller(&frame); | else | unwind_init_blocked_task(&frame, task); | | unwind(task, &frame, consume_entry, cookie); | } | | noinline int notrace arch_stack_walk_reliable(stack_trace_consume_fn consume_fn, | void *cookie, | struct task_struct *task) | { | struct stackframe frame; | | if (task == current) | unwind_init_from_caller(&frame); | else | unwind_init_from_blocked_task(&frame, task); | | if (unwind(task, &frame, consume_fn, cookie)) | return 0; | return -EINVAL; | } ... which minimizes the duplication and allows us to add specialized initialization for each case if necessary, which I believe we will need in future to make unwinding across exception boundaries (such as when starting with regs) more useful. Thanks, Mark.