Remove the SQPOLL thread from the ctx, and use the io_sq_data as the data structure we pass in. io_sq_data has a list of ctx's that we can then iterate over and handle. As of now we're ready to handle multiple ctx's, though we're still just handling a single one after this patch. Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- fs/io_uring.c | 101 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 72 insertions(+), 29 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 35ea69aad9c0..5bafc7a2c65c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -231,6 +231,12 @@ struct io_restriction { struct io_sq_data { refcount_t refs; + + /* ctx's that are using this sqd */ + struct list_head ctx_list; + struct list_head ctx_new_list; + struct mutex ctx_lock; + struct task_struct *thread; struct wait_queue_head wait; }; @@ -290,6 +296,7 @@ struct io_ring_ctx { struct files_struct *sqo_files; struct wait_queue_entry sqo_wait_entry; + struct list_head sqd_list; struct io_sq_data *sq_data; /* if using sq thread polling */ @@ -1090,6 +1097,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) goto err; ctx->flags = p->flags; + INIT_LIST_HEAD(&ctx->sqd_list); init_waitqueue_head(&ctx->cq_wait); INIT_LIST_HEAD(&ctx->cq_overflow_list); init_completion(&ctx->ref_comp); @@ -6712,49 +6720,74 @@ static enum sq_ret __io_sq_thread(struct io_ring_ctx *ctx, return SQT_DID_WORK; } +static void io_sqd_init_new(struct io_sq_data *sqd) +{ + struct io_ring_ctx *ctx; + + while (!list_empty(&sqd->ctx_new_list)) { + ctx = list_first_entry(&sqd->ctx_new_list, struct io_ring_ctx, sqd_list); + init_wait(&ctx->sqo_wait_entry); + ctx->sqo_wait_entry.func = io_sq_wake_function; + list_move_tail(&ctx->sqd_list, &sqd->ctx_list); + complete(&ctx->sq_thread_comp); + } +} + static int io_sq_thread(void *data) { - struct io_ring_ctx *ctx = data; - const struct cred *old_cred; + struct io_sq_data *sqd = data; + struct io_ring_ctx *ctx; unsigned long start_jiffies; - init_wait(&ctx->sqo_wait_entry); - ctx->sqo_wait_entry.func = io_sq_wake_function; + start_jiffies = jiffies; + while (!kthread_should_park()) { + const struct cred *old_cred = NULL; + enum sq_ret ret = 0; - complete(&ctx->sq_thread_comp); + mutex_lock(&sqd->ctx_lock); - old_cred = override_creds(ctx->creds); + if (unlikely(!list_empty(&sqd->ctx_new_list))) + io_sqd_init_new(sqd); - task_lock(current); - current->files = ctx->sqo_files; - task_unlock(current); + list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { + if (current->cred != ctx->creds) { + if (old_cred) + revert_creds(old_cred); + old_cred = override_creds(ctx->creds); + } - start_jiffies = jiffies; - while (!kthread_should_park()) { - enum sq_ret ret; + if (current->files != ctx->sqo_files) { + task_lock(current); + current->files = ctx->sqo_files; + task_unlock(current); + } - ret = __io_sq_thread(ctx, start_jiffies); - switch (ret) { - case SQT_IDLE: - schedule(); - start_jiffies = jiffies; - continue; - case SQT_SPIN: + ret |= __io_sq_thread(ctx, start_jiffies); + + io_sq_thread_drop_mm(); + } + + mutex_unlock(&sqd->ctx_lock); + + if (old_cred) + revert_creds(old_cred); + + if (ret & SQT_SPIN) { io_run_task_work(); cond_resched(); - fallthrough; - case SQT_DID_WORK: - continue; + } else if (ret == SQT_IDLE) { + schedule(); + start_jiffies = jiffies; } } io_run_task_work(); - io_sq_thread_drop_mm(); - task_lock(current); - current->files = NULL; - task_unlock(current); - revert_creds(old_cred); + if (current->files) { + task_lock(current); + current->files = NULL; + task_unlock(current); + } kthread_parkme(); @@ -6955,6 +6988,9 @@ static struct io_sq_data *io_get_sq_data(struct io_uring_params *p) return ERR_PTR(-ENOMEM); refcount_set(&sqd->refs, 1); + INIT_LIST_HEAD(&sqd->ctx_list); + INIT_LIST_HEAD(&sqd->ctx_new_list); + mutex_init(&sqd->ctx_lock); init_waitqueue_head(&sqd->wait); return sqd; } @@ -6967,6 +7003,10 @@ static void io_sq_thread_stop(struct io_ring_ctx *ctx) if (sqd->thread) wait_for_completion(&ctx->sq_thread_comp); + mutex_lock(&sqd->ctx_lock); + list_del(&ctx->sqd_list); + mutex_unlock(&sqd->ctx_lock); + io_put_sq_data(sqd); ctx->sq_data = NULL; } @@ -7620,6 +7660,9 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx, goto err; } ctx->sq_data = sqd; + mutex_lock(&sqd->ctx_lock); + list_add(&ctx->sqd_list, &sqd->ctx_new_list); + mutex_unlock(&sqd->ctx_lock); /* * We will exit the sqthread before current exits, so we can @@ -7641,10 +7684,10 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx, if (!cpu_online(cpu)) goto err; - sqd->thread = kthread_create_on_cpu(io_sq_thread, ctx, + sqd->thread = kthread_create_on_cpu(io_sq_thread, sqd, cpu, "io_uring-sq"); } else { - sqd->thread = kthread_create(io_sq_thread, ctx, + sqd->thread = kthread_create(io_sq_thread, sqd, "io_uring-sq"); } if (IS_ERR(sqd->thread)) { -- 2.28.0