Re: [PATCH v11 1/5] arm64: Call stack_backtrace() only from within walk_stackframe()

"Madhavan T. Venkataraman" <madvenka@xxxxxxxxxxxxxxxxxxx> · Tue, 30 Nov 2021 14:29:41 -0600

On 11/30/21 12:29 PM, Mark Rutland wrote:
> On Tue, Nov 30, 2021 at 11:13:28AM -0600, Madhavan T. Venkataraman wrote:
>> On 11/30/21 9:05 AM, Mark Rutland wrote:
>>> On Tue, Nov 23, 2021 at 01:37:19PM -0600, madvenka@xxxxxxxxxxxxxxxxxxx wrote:
>>>> From: "Madhavan T. Venkataraman" <madvenka@xxxxxxxxxxxxxxxxxxx>
>>>>
>>>> Currently, arch_stack_walk() calls start_backtrace() and walk_stackframe()
>>>> separately. There is no need to do that. Instead, call start_backtrace()
>>>> from within walk_stackframe(). In other words, walk_stackframe() is the only
>>>> unwind function a consumer needs to call.
> 
>>>> @@ -143,15 +140,19 @@ static int notrace unwind_frame(struct task_struct *tsk,
>>>>  NOKPROBE_SYMBOL(unwind_frame);
>>>>  
>>>>  static void notrace walk_stackframe(struct task_struct *tsk,
>>>> -				    struct stackframe *frame,
>>>> +				    unsigned long fp, unsigned long pc,
>>>>  				    bool (*fn)(void *, unsigned long), void *data)
>>>>  {
>>>> +	struct stackframe frame;
>>>> +
>>>> +	start_backtrace(&frame, fp, pc);
>>>> +
>>>>  	while (1) {
>>>>  		int ret;
>>>>  
>>>> -		if (!fn(data, frame->pc))
>>>> +		if (!fn(data, frame.pc))
>>>>  			break;
>>>> -		ret = unwind_frame(tsk, frame);
>>>> +		ret = unwind_frame(tsk, &frame);
>>>>  		if (ret < 0)
>>>>  			break;
>>>>  	}
>>>> @@ -195,17 +196,19 @@ noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
>>>>  			      void *cookie, struct task_struct *task,
>>>>  			      struct pt_regs *regs)
>>>>  {
>>>> -	struct stackframe frame;
>>>> -
>>>> -	if (regs)
>>>> -		start_backtrace(&frame, regs->regs[29], regs->pc);
>>>> -	else if (task == current)
>>>> -		start_backtrace(&frame,
>>>> -				(unsigned long)__builtin_frame_address(1),
>>>> -				(unsigned long)__builtin_return_address(0));
>>>> -	else
>>>> -		start_backtrace(&frame, thread_saved_fp(task),
>>>> -				thread_saved_pc(task));
>>>> -
>>>> -	walk_stackframe(task, &frame, consume_entry, cookie);
>>>> +	unsigned long fp, pc;
>>>> +
>>>> +	if (regs) {
>>>> +		fp = regs->regs[29];
>>>> +		pc = regs->pc;
>>>> +	} else if (task == current) {
>>>> +		/* Skip arch_stack_walk() in the stack trace. */
>>>> +		fp = (unsigned long)__builtin_frame_address(1);
>>>> +		pc = (unsigned long)__builtin_return_address(0);
>>>> +	} else {
>>>> +		/* Caller guarantees that the task is not running. */
>>>> +		fp = thread_saved_fp(task);
>>>> +		pc = thread_saved_pc(task);
>>>> +	}
>>>> +	walk_stackframe(task, fp, pc, consume_entry, cookie);
>>>
>>> I'd prefer to leave this as-is. The new and old structure are largely
>>> equivalent, so we haven't made this any simpler, but we have added more
>>> arguments to walk_stackframe().
>>>
>>
>> This is just to simplify things when we eventually add arch_stack_walk_reliable().
>> That is all. All of the unwinding is done by a single unwinding function and
>> there are two consumers of that unwinding function - arch_stack_walk() and
>> arch_stack_walk_reliable().
> 
> I understand the theory, but I don't think that moving the start_backtrace()
> call actually simplifies this in a meaningful way, and I think it'll make it
> harder for us to make more meaningful simplifications later on.
> 
> As of patch 4 of this series, we'll have:
> 
> | noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
> | 				      void *cookie, struct task_struct *task,
> | 				      struct pt_regs *regs)
> | {
> | 	unsigned long fp, pc;
> | 
> | 	if (regs) {
> | 		fp = regs->regs[29];
> | 		pc = regs->pc;
> | 	} else if (task == current) {
> | 		/* Skip arch_stack_walk() in the stack trace. */
> | 		fp = (unsigned long)__builtin_frame_address(1);
> | 		pc = (unsigned long)__builtin_return_address(0);
> | 	} else {
> | 		/* Caller guarantees that the task is not running. */
> | 		fp = thread_saved_fp(task);
> | 		pc = thread_saved_pc(task);
> | 	}
> | 	walk_stackframe(task, fp, pc, consume_entry, cookie);
> | }
> | 
> | noinline int notrace arch_stack_walk_reliable(stack_trace_consume_fn consume_fn,
> |                                              void *cookie,
> |                                              struct task_struct *task)
> | {
> | 	unsigned long fp, pc;
> | 
> | 	if (task == current) {
> | 		/* Skip arch_stack_walk_reliable() in the stack trace. */
> | 		fp = (unsigned long)__builtin_frame_address(1);
> | 		pc = (unsigned long)__builtin_return_address(0);
> | 	} else {
> | 		/* Caller guarantees that the task is not running. */
> | 		fp = thread_saved_fp(task);
> | 		pc = thread_saved_pc(task);
> | 	}
> | 	if (unwind(task, fp, pc, consume_fn, cookie))
> | 		return 0;
> | 	return -EINVAL;
> | }
> 
> Which I do not think is substantially simpler than the naive extrapolation from
> what we currently have, e.g.
> 
> | noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
> | 				      void *cookie, struct task_struct *task,
> | 				      struct pt_regs *regs)
> | {
> |	struct stackframe frame;
> | 
> | 	if (regs) {
> |		unwind_init(&frame, regs->regs[29], regs->pc)
> | 	} else if (task == current) {
> |		unwind_init(&frame, __builtin_frame_address(1),
> |			    __builtin_return_address(0));
> | 	} else {
> |		unwind_init(&frame, thread_saved_fp(task),
> |			    thread_saved_pc(task);
> | 	}
> | 	walk_stackframe(task, &frame, consume_entry, cookie);
> | }
> | 
> | noinline int notrace arch_stack_walk_reliable(stack_trace_consume_fn consume_fn,
> |                                              void *cookie,
> |                                              struct task_struct *task)
> | {
> |	struct stackframe frame;
> | 
> | 	if (task == current) {
> |		unwind_init(&frame, __builtin_frame_address(1),
> |			    __builtin_return_address(0));
> | 	} else {
> |		unwind_init(&frame, thread_saved_fp(task),
> |			    thread_saved_pc(task);
> | 	}
> | 	if (unwind(task, &frame, consume_fn, cookie))
> | 		return 0;
> | 	return -EINVAL;
> | }
> 
> Further, I think we can factor this in a different way to reduce the
> duplication:
> 
> | /*
> |  * TODO: document requirements here
> |  */
> | static inline void unwind_init_from_current_regs(struct stackframe *frame,
> | 						 struct pt_regs *regs)
> | {
> | 	unwind_init(frame, regs->regs[29], regs->pc);
> | }
> | 
> | /*
> |  * TODO: document requirements here
> |  */
> | static inline void unwind_init_from_blocked_task(struct stackframe *frame,
> | 						 struct task_struct *tsk)
> | {
> | 	unwind_init(&frame, thread_saved_fp(task),
> | 		    thread_saved_pc(task));
> | }
> | 
> | /*
> |  * TODO: document requirements here
> |  *
> |  * Note: this is always inlined, and we expect our caller to be a noinline
> |  * function, such that this starts from our caller's caller.
> |  */
> | static __always_inline void unwind_init_from_caller(struct stackframe *frame)
> | {
> | 	unwind_init(frame, __builtin_frame_address(1),
> | 		    __builtin_return_address(0));
> | }
> |
> | noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
> | 				      void *cookie, struct task_struct *task,
> | 				      struct pt_regs *regs)
> | {
> |	struct stackframe frame;
> | 
> | 	if (regs)
> |		unwind_init_current_regs(&frame, regs);
> |	else if (task == current)
> |		unwind_init_from_caller(&frame);
> |	else
> |		unwind_init_blocked_task(&frame, task);
> |
> |	unwind(task, &frame, consume_entry, cookie);
> | }
> |
> | noinline int notrace arch_stack_walk_reliable(stack_trace_consume_fn consume_fn,
> |                                              void *cookie,
> |                                              struct task_struct *task)
> | {
> |	struct stackframe frame;
> | 
> | 	if (task == current)
> |		unwind_init_from_caller(&frame);
> | 	else
> |		unwind_init_from_blocked_task(&frame, task);
> |
> | 	if (unwind(task, &frame, consume_fn, cookie))
> | 		return 0;
> | 	return -EINVAL;
> | }
> 
> ... which minimizes the duplication and allows us to add specialized
> initialization for each case if necessary, which I believe we will need in
> future to make unwinding across exception boundaries (such as when starting
> with regs) more useful.
> 
> Thanks,
> Mark.
> 

OK. I don't mind doing it this way.

Thanks.

Madhavan