On Fri, Oct 22, 2021 at 05:09:35PM +0200, Peter Zijlstra wrote: > Recent patches to get_wchan() made it more robust by only doing the > unwind when the task was blocked and serialized against wakeups. > > Extract this functionality as a simpler companion to task_call_func() > named task_try_func() that really only cares about blocked tasks. Then > employ this new function to implement the same robustness for > ARCH_STACKWALK based stack_trace_save_tsk(). > > Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> > --- > include/linux/wait.h | 1 > kernel/sched/core.c | 62 ++++++++++++++++++++++++++++++++++++++++++++------- > kernel/stacktrace.c | 13 ++++++---- > 3 files changed, 63 insertions(+), 13 deletions(-) > > --- a/include/linux/wait.h > +++ b/include/linux/wait.h > @@ -1162,5 +1162,6 @@ int autoremove_wake_function(struct wait > > typedef int (*task_call_f)(struct task_struct *p, void *arg); > extern int task_call_func(struct task_struct *p, task_call_f func, void *arg); > +extern int task_try_func(struct task_struct *p, task_call_f func, void *arg); > > #endif /* _LINUX_WAIT_H */ > --- a/kernel/sched/core.c > +++ b/kernel/sched/core.c > @@ -1966,21 +1966,21 @@ bool sched_task_on_rq(struct task_struct > return task_on_rq_queued(p); > } > > +static int try_get_wchan(struct task_struct *p, void *arg) > +{ > + unsigned long *wchan = arg; > + *wchan = __get_wchan(p); > + return 0; > +} > + > unsigned long get_wchan(struct task_struct *p) > { > unsigned long ip = 0; > - unsigned int state; > > if (!p || p == current) > return 0; > > - /* Only get wchan if task is blocked and we can keep it that way. */ > - raw_spin_lock_irq(&p->pi_lock); > - state = READ_ONCE(p->__state); > - smp_rmb(); /* see try_to_wake_up() */ > - if (state != TASK_RUNNING && state != TASK_WAKING && !p->on_rq) > - ip = __get_wchan(p); > - raw_spin_unlock_irq(&p->pi_lock); > + task_try_func(p, try_get_wchan, &ip); > > return ip; > } > @@ -4184,6 +4184,52 @@ int task_call_func(struct task_struct *p > return ret; > } > > +/* > + * task_try_func - Invoke a function on task in blocked state > + * @p: Process for which the function is to be invoked > + * @func: Function to invoke > + * @arg: Argument to function > + * > + * Fix the task in a blocked state, when possible. And if so, invoke @func on it. > + * > + * Returns: > + * -EBUSY or whatever @func returns > + */ > +int task_try_func(struct task_struct *p, task_call_f func, void *arg) > +{ > + unsigned long flags; > + unsigned int state; > + int ret = -EBUSY; > + > + raw_spin_lock_irqsave(&p->pi_lock, flags); > + > + state = READ_ONCE(p->__state); > + > + /* > + * Ensure we load p->on_rq after p->__state, otherwise it would be > + * possible to, falsely, observe p->on_rq == 0. > + * > + * See try_to_wake_up() for a longer comment. > + */ > + smp_rmb(); > + > + /* > + * Since pi->lock blocks try_to_wake_up(), we don't need rq->lock when > + * the task is blocked. Make sure to check @state since ttwu() can drop > + * locks at the end, see ttwu_queue_wakelist(). > + */ > + if (state != TASK_RUNNING && state != TASK_WAKING && !p->on_rq) { > + /* > + * The task is blocked and we're holding off wakeupsr. For any > + * of the other task states, see task_call_func(). > + */ > + ret = func(p, arg); > + } > + > + raw_spin_unlock_irqrestore(&p->pi_lock, flags); > + return ret; > +} > + > /** > * wake_up_process - Wake up a specific process > * @p: The process to be woken up. > --- a/kernel/stacktrace.c > +++ b/kernel/stacktrace.c > @@ -123,6 +123,13 @@ unsigned int stack_trace_save(unsigned l > } > EXPORT_SYMBOL_GPL(stack_trace_save); > > +static int try_arch_stack_walk_tsk(struct task_struct *tsk, void *arg) > +{ > + stack_trace_consume_fn consume_entry = stack_trace_consume_entry_nosched; > + arch_stack_walk(consume_entry, arg, tsk, NULL); > + return 0; > +} > + > /** > * stack_trace_save_tsk - Save a task stack trace into a storage array > * @task: The task to examine > @@ -135,7 +142,6 @@ EXPORT_SYMBOL_GPL(stack_trace_save); > unsigned int stack_trace_save_tsk(struct task_struct *tsk, unsigned long *store, > unsigned int size, unsigned int skipnr) > { > - stack_trace_consume_fn consume_entry = stack_trace_consume_entry_nosched; > struct stacktrace_cookie c = { > .store = store, > .size = size, > @@ -143,11 +149,8 @@ unsigned int stack_trace_save_tsk(struct > .skip = skipnr + (current == tsk), > }; > > - if (!try_get_task_stack(tsk)) > - return 0; > + task_try_func(tsk, try_arch_stack_walk_tsk, &c); Pardon my thin understanding of the scheduler, but I assume this change doesn't mean stack_trace_save_tsk() stops working for "current", right? In trying to answer this for myself, I couldn't convince myself what value current->__state have here. Is it one of TASK_(UN)INTERRUPTIBLE ? Assuming this does actually remain callable for current: Reviewed-by: Kees Cook <keescook@xxxxxxxxxxxx> > > - arch_stack_walk(consume_entry, &c, tsk, NULL); > - put_task_stack(tsk); > return c.len; > } > > > -- Kees Cook