Re: [PATCH 4/4] io_uring: flush task work before waiting for ring exit

Oleg Nesterov <oleg@xxxxxxxxxx> · Tue, 7 Apr 2020 18:38:17 +0200

On 04/07, Oleg Nesterov wrote:
>
> On 04/07, Jens Axboe wrote:
> >
> > --- a/fs/io_uring.c
> > +++ b/fs/io_uring.c
> > @@ -7293,10 +7293,15 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
> >  		io_wq_cancel_all(ctx->io_wq);
> >
> >  	io_iopoll_reap_events(ctx);
> > +	idr_for_each(&ctx->personality_idr, io_remove_personalities, ctx);
> > +
> > +	if (current->task_works != &task_work_exited)
> > +		task_work_run();
>
> this is still wrong, please see the email I sent a minute ago.

Let me try to explain in case it was not clear. Lets forget about io_uring.

	void bad_work_func(struct callback_head *cb)
	{
		task_work_run();
	}

	...

	init_task_work(&my_work, bad_work_func);

	task_work_add(task, &my_work);

If the "task" above is exiting the kernel will crash; because the 2nd
task_work_run() called by bad_work_func() will install work_exited, then
we return to task_work_run() which was called by exit_task_work(), it will
notice ->task_works != NULL, restart the main loop, and execute
work_exited->fn == NULL.

Again, if we want to allow task_work_run() in do_exit() paths we need
something like below. But still do not understand why do we need this :/

Oleg.

diff --git a/include/linux/task_work.h b/include/linux/task_work.h
index bd9a6a91c097..c9f36d233c39 100644
--- a/include/linux/task_work.h
+++ b/include/linux/task_work.h
@@ -15,11 +15,16 @@ init_task_work(struct callback_head *twork, task_work_func_t func)
 
 int task_work_add(struct task_struct *task, struct callback_head *twork, bool);
 struct callback_head *task_work_cancel(struct task_struct *, task_work_func_t);
-void task_work_run(void);
+void __task_work_run(void);
+
+static inline void task_work_run(void)
+{
+	__task_work_run(false);
+}
 
 static inline void exit_task_work(struct task_struct *task)
 {
-	task_work_run();
+	__task_work_run(true);
 }
 
 #endif	/* _LINUX_TASK_WORK_H */
diff --git a/kernel/task_work.c b/kernel/task_work.c
index 825f28259a19..7b26203a583e 100644
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -87,7 +87,7 @@ task_work_cancel(struct task_struct *task, task_work_func_t func)
  * it exits. In the latter case task_work_add() can no longer add the
  * new work after task_work_run() returns.
  */
-void task_work_run(void)
+void __task_work_run(bool is_exit)
 {
 	struct task_struct *task = current;
 	struct callback_head *work, *head, *next;
@@ -101,7 +101,7 @@ void task_work_run(void)
 			head = NULL;
 			work = READ_ONCE(task->task_works);
 			if (!work) {
-				if (task->flags & PF_EXITING)
+				if (is_exit)
 					head = &work_exited;
 				else
 					break;