On 03/18, Christian Brauner wrote: > > @@ -746,8 +751,23 @@ static void exit_notify(struct task_struct *tsk, int group_dead) > * sub-thread or delay_group_leader(), wake up the > * PIDFD_THREAD waiters. > */ > - if (!thread_group_empty(tsk)) > - do_notify_pidfd(tsk); > + if (!thread_group_empty(tsk)) { > + if (delay_group_leader(tsk)) { > + struct pid *pid; > + > + /* > + * This is a thread-group leader exiting before > + * all of its subthreads have exited allow pidfd > + * polling to detect this case and delay exit > + * notification until the last thread has > + * exited. > + */ > + pid = task_pid(tsk); > + WRITE_ONCE(pid->delayed_leader, 1); This is racy, tsk->exit_state is already set so pidfd_poll() can see task->exit_state && !pid->delayed_leader. But this is minor. I can't understand all these complications, probably because I barely slept tonight ;) I will re-read this patch again tomorrow, but could you explain why we can't simply use the trivial patch below? Oleg. --- diff --git a/fs/pidfs.c b/fs/pidfs.c index d980f779c213..8a95920aed98 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -210,7 +210,6 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f) static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts) { struct pid *pid = pidfd_pid(file); - bool thread = file->f_flags & PIDFD_THREAD; struct task_struct *task; __poll_t poll_flags = 0; @@ -223,7 +222,7 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts) task = pid_task(pid, PIDTYPE_PID); if (!task) poll_flags = EPOLLIN | EPOLLRDNORM | EPOLLHUP; - else if (task->exit_state && (thread || thread_group_empty(task))) + else if (task->exit_state && !delay_group_leader(task)) poll_flags = EPOLLIN | EPOLLRDNORM; return poll_flags; diff --git a/kernel/exit.c b/kernel/exit.c index 9916305e34d3..356ca41d313b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -746,7 +746,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead) * sub-thread or delay_group_leader(), wake up the * PIDFD_THREAD waiters. */ - if (!thread_group_empty(tsk)) + if (!delay_group_leader(tsk)) do_notify_pidfd(tsk); if (unlikely(tsk->ptrace)) {