Mark the plug with nowait == true, which will cause requests to avoid blocking on request allocation. If they do, we catch them and add them to the plug list. Once we finish the plug, re-issue requests that got caught. Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- Since v1: - Properly re-prep on resubmit - Sanity check for io_wq_current_is_worker() - Sanity check we're just doing read/write for re-issue - Ensure iov state is sane for resubmit diff --git a/fs/io_uring.c b/fs/io_uring.c index 625578715d37..942984bda2f8 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1947,12 +1947,31 @@ static void io_complete_rw_common(struct kiocb *kiocb, long res) __io_cqring_add_event(req, res, cflags); } +static bool io_rw_reissue(struct io_kiocb *req, long res) +{ +#ifdef CONFIG_BLOCK + struct blk_plug *plug; + + if ((res != -EAGAIN && res != -EOPNOTSUPP) || io_wq_current_is_worker()) + return false; + + plug = current->plug; + if (plug && plug->nowait) { + list_add_tail(&req->list, &plug->nowait_list); + return true; + } +#endif + return false; +} + static void io_complete_rw(struct kiocb *kiocb, long res, long res2) { struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); - io_complete_rw_common(kiocb, res); - io_put_req(req); + if (!io_rw_reissue(req, res)) { + io_complete_rw_common(kiocb, res); + io_put_req(req); + } } static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) @@ -2629,6 +2648,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock) iov_count = iov_iter_count(&iter); ret = rw_verify_area(READ, req->file, &kiocb->ki_pos, iov_count); if (!ret) { + unsigned long nr_segs = iter.nr_segs; ssize_t ret2 = 0; if (req->file->f_op->read_iter) @@ -2640,6 +2660,8 @@ static int io_read(struct io_kiocb *req, bool force_nonblock) if (!force_nonblock || (ret2 != -EAGAIN && ret2 != -EIO)) { kiocb_done(kiocb, ret2); } else { + iter.count = iov_count; + iter.nr_segs = nr_segs; copy_iov: ret = io_setup_async_rw(req, io_size, iovec, inline_vecs, &iter); @@ -2726,6 +2748,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock) iov_count = iov_iter_count(&iter); ret = rw_verify_area(WRITE, req->file, &kiocb->ki_pos, iov_count); if (!ret) { + unsigned long nr_segs = iter.nr_segs; ssize_t ret2; /* @@ -2763,6 +2786,8 @@ static int io_write(struct io_kiocb *req, bool force_nonblock) if (!force_nonblock || ret2 != -EAGAIN) { kiocb_done(kiocb, ret2); } else { + iter.count = iov_count; + iter.nr_segs = nr_segs; copy_iov: ret = io_setup_async_rw(req, io_size, iovec, inline_vecs, &iter); @@ -5789,12 +5814,70 @@ static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, return 0; } +#ifdef CONFIG_BLOCK +static bool io_resubmit_prep(struct io_kiocb *req) +{ + struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; + struct iov_iter iter; + ssize_t ret; + int rw; + + switch (req->opcode) { + case IORING_OP_READV: + case IORING_OP_READ_FIXED: + case IORING_OP_READ: + rw = READ; + break; + case IORING_OP_WRITEV: + case IORING_OP_WRITE_FIXED: + case IORING_OP_WRITE: + rw = WRITE; + break; + default: + printk_once(KERN_WARNING "io_uring: bad opcode in resubmit %d\n", + req->opcode); + goto end_req; + } + + ret = io_import_iovec(rw, req, &iovec, &iter, false); + if (ret < 0) + goto end_req; + ret = io_setup_async_rw(req, ret, iovec, inline_vecs, &iter); + if (!ret) + return true; + kfree(iovec); +end_req: + io_cqring_add_event(req, ret); + req_set_fail_links(req); + io_put_req(req); + return false; +} + +static void io_resubmit_rw(struct list_head *list) +{ + struct io_kiocb *req; + + while (!list_empty(list)) { + req = list_first_entry(list, struct io_kiocb, list); + list_del(&req->list); + if (io_resubmit_prep(req)) { + refcount_inc(&req->refs); + io_queue_async_work(req); + } + } +} +#endif + /* * Batched submission is done, ensure local IO is flushed out. */ static void io_submit_state_end(struct io_submit_state *state) { blk_finish_plug(&state->plug); +#ifdef CONFIG_BLOCK + if (unlikely(!list_empty(&state->plug.nowait_list))) + io_resubmit_rw(&state->plug.nowait_list); +#endif io_state_file_put(state); if (state->free_reqs) kmem_cache_free_bulk(req_cachep, state->free_reqs, state->reqs); @@ -5807,6 +5890,10 @@ static void io_submit_state_start(struct io_submit_state *state, unsigned int max_ios) { blk_start_plug(&state->plug); +#ifdef CONFIG_BLOCK + INIT_LIST_HEAD(&state->plug.nowait_list); + state->plug.nowait = true; +#endif state->free_reqs = 0; state->file = NULL; state->ios_left = max_ios; -- Jens Axboe