[PATCH 12/33] io_uring: signal worker thread unshare

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



If the original task switches credentials or unshares any part of the
task state, then we should notify the io_uring workers to they can
re-fork as well. For credentials, this actually happens just fine for
the io-wq workers, as we grab and pass that down. For SQPOLL, we're
stuck with the original credentials, which means that it cannot be used
if the task does eg seteuid().

For unshare(2), the story is the same, except a task cannot do that and
expect the workers to assume the new identity.

Fix this up by just having the threads exit and re-fork if the ring task
does seteuid() (and friends), or does unshare(2) on any parts of the
task.

Signed-off-by: Jens Axboe <axboe@xxxxxxxxx>
---
 fs/io-wq.c               | 21 ++++++++++++++++-----
 fs/io-wq.h               |  1 +
 fs/io_uring.c            | 26 ++++++++++++++++++++++++--
 include/linux/io_uring.h |  9 +++++++++
 kernel/cred.c            |  2 ++
 kernel/fork.c            |  2 ++
 6 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/fs/io-wq.c b/fs/io-wq.c
index 65ae35ca8dba..c24473231eee 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -744,6 +744,7 @@ static int io_wq_manager(void *data)
 {
 	struct io_wq *wq = data;
 	char buf[TASK_COMM_LEN];
+	int node;
 
 	sprintf(buf, "iou-mgr-%d", wq->task_pid);
 	set_task_comm(current, buf);
@@ -761,6 +762,12 @@ static int io_wq_manager(void *data)
 	} while (!test_bit(IO_WQ_BIT_EXIT, &wq->state));
 
 	io_wq_check_workers(wq);
+
+	rcu_read_lock();
+	for_each_node(node)
+		io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL);
+	rcu_read_unlock();
+
 	/* we might not ever have created any workers */
 	if (atomic_read(&wq->worker_refs))
 		wait_for_completion(&wq->worker_done);
@@ -1097,11 +1104,6 @@ static void io_wq_destroy(struct io_wq *wq)
 	set_bit(IO_WQ_BIT_EXIT, &wq->state);
 	io_wq_destroy_manager(wq);
 
-	rcu_read_lock();
-	for_each_node(node)
-		io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL);
-	rcu_read_unlock();
-
 	spin_lock_irq(&wq->hash->wait.lock);
 	for_each_node(node) {
 		struct io_wqe *wqe = wq->wqes[node];
@@ -1165,3 +1167,12 @@ static __init int io_wq_init(void)
 	return 0;
 }
 subsys_initcall(io_wq_init);
+
+void io_wq_unshare(struct io_wq *wq)
+{
+	refcount_inc(&wq->refs);
+	set_bit(IO_WQ_BIT_EXIT, &wq->state);
+	io_wq_destroy_manager(wq);
+	clear_bit(IO_WQ_BIT_EXIT, &wq->state);
+	io_wq_put(wq);
+}
diff --git a/fs/io-wq.h b/fs/io-wq.h
index f6ef433df8a8..57e478af1e1d 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -115,6 +115,7 @@ struct io_wq_data {
 struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
 void io_wq_put(struct io_wq *wq);
 void io_wq_put_and_exit(struct io_wq *wq);
+void io_wq_unshare(struct io_wq *wq);
 
 void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
 void io_wq_hash_work(struct io_wq_work *work, void *val);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 83973f6b3c0a..f89d7375a7c3 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -8955,6 +8955,24 @@ void __io_uring_task_cancel(void)
 	io_uring_remove_task_files(tctx);
 }
 
+void __io_uring_unshare(void)
+{
+	struct io_uring_task *tctx = current->io_uring;
+	struct file *file;
+	unsigned long index;
+
+	io_wq_unshare(tctx->io_wq);
+	if (!tctx->sqpoll)
+		return;
+
+	xa_for_each(&tctx->xa, index, file) {
+		struct io_ring_ctx *ctx = file->private_data;
+
+		if (ctx->sq_data)
+			io_sq_thread_stop(ctx->sq_data);
+	}
+}
+
 static int io_uring_flush(struct file *file, void *data)
 {
 	struct io_uring_task *tctx = current->io_uring;
@@ -9170,10 +9188,14 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 		io_cqring_overflow_flush(ctx, false, NULL, NULL);
 
 		if (unlikely(ctx->sqo_exec)) {
-			ret = io_sq_thread_fork(ctx->sq_data, ctx);
+			struct io_sq_data *sqd = ctx->sq_data;
+
+			ret = io_sq_thread_fork(sqd, ctx);
+			if (ret)
+				set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
+			complete(&sqd->startup);
 			if (ret)
 				goto out;
-			ctx->sqo_exec = 0;
 		}
 		ret = -EOWNERDEAD;
 		if (unlikely(ctx->sqo_dead))
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index 51ede771cd99..bfe2fcb4f478 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -35,7 +35,13 @@ struct sock *io_uring_get_socket(struct file *file);
 void __io_uring_task_cancel(void);
 void __io_uring_files_cancel(struct files_struct *files);
 void __io_uring_free(struct task_struct *tsk);
+void __io_uring_unshare(void);
 
+static inline void io_uring_unshare(void)
+{
+	if (current->io_uring)
+		__io_uring_unshare();
+}
 static inline void io_uring_task_cancel(void)
 {
 	if (current->io_uring && !xa_empty(&current->io_uring->xa))
@@ -56,6 +62,9 @@ static inline struct sock *io_uring_get_socket(struct file *file)
 {
 	return NULL;
 }
+static inline void io_uring_unshare(void)
+{
+}
 static inline void io_uring_task_cancel(void)
 {
 }
diff --git a/kernel/cred.c b/kernel/cred.c
index 421b1149c651..324e3ee61e1d 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -16,6 +16,7 @@
 #include <linux/binfmts.h>
 #include <linux/cn_proc.h>
 #include <linux/uidgid.h>
+#include <linux/io_uring.h>
 
 #if 0
 #define kdebug(FMT, ...)						\
@@ -509,6 +510,7 @@ int commit_creds(struct cred *new)
 	/* release the old obj and subj refs both */
 	put_cred(old);
 	put_cred(old);
+	io_uring_unshare();
 	return 0;
 }
 EXPORT_SYMBOL(commit_creds);
diff --git a/kernel/fork.c b/kernel/fork.c
index d66cd1014211..5d1b00083c9e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2999,6 +2999,8 @@ int ksys_unshare(unsigned long unshare_flags)
 			commit_creds(new_cred);
 			new_cred = NULL;
 		}
+
+		io_uring_unshare();
 	}
 
 	perf_event_namespaces(current);
-- 
2.30.1




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux