We've had a few application cases of not handling short reads properly, and it is understandable as short reads aren't really expected if the application isn't doing non-blocking IO. Now that we retain the iov_iter over retries, we can implement internal retry pretty trivially. This ensures that we don't return a short read, even for buffered reads on page cache conflicts. Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- fs/io_uring.c | 72 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 45 insertions(+), 27 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index a20fccf91d76..c6a9f1b11e85 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -509,6 +509,7 @@ struct io_async_msghdr { struct io_async_rw { struct iovec fast_iov[UIO_FASTIOV]; struct iov_iter iter; + size_t bytes_done; struct wait_page_queue wpq; }; @@ -914,7 +915,8 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req, struct iovec **iovec, struct iov_iter *iter, bool needs_lock); static int io_setup_async_rw(struct io_kiocb *req, struct iovec *iovec, - struct iovec *fast_iov, struct iov_iter *iter); + struct iovec *fast_iov, struct iov_iter *iter, + bool force); static struct kmem_cache *req_cachep; @@ -2296,7 +2298,7 @@ static bool io_resubmit_prep(struct io_kiocb *req, int error) ret = io_import_iovec(rw, req, &iovec, &iter, false); if (ret < 0) goto end_req; - ret = io_setup_async_rw(req, iovec, inline_vecs, &iter); + ret = io_setup_async_rw(req, iovec, inline_vecs, &iter, false); if (!ret) return true; kfree(iovec); @@ -2586,6 +2588,14 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret, { struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); + /* add previously done IO, if any */ + if (req->io && req->io->rw.bytes_done > 0) { + if (ret < 0) + ret = req->io->rw.bytes_done; + else + ret += req->io->rw.bytes_done; + } + if (req->flags & REQ_F_CUR_POS) req->file->f_pos = kiocb->ki_pos; if (ret >= 0 && kiocb->ki_complete == io_complete_rw) @@ -2932,6 +2942,7 @@ static void io_req_map_rw(struct io_kiocb *req, const struct iovec *iovec, struct io_async_rw *rw = &req->io->rw; memcpy(&rw->iter, iter, sizeof(*iter)); + rw->bytes_done = 0; if (!iovec) { rw->iter.iov = rw->fast_iov; if (rw->iter.iov != fast_iov) @@ -2958,9 +2969,10 @@ static int io_alloc_async_ctx(struct io_kiocb *req) } static int io_setup_async_rw(struct io_kiocb *req, struct iovec *iovec, - struct iovec *fast_iov, struct iov_iter *iter) + struct iovec *fast_iov, struct iov_iter *iter, + bool force) { - if (!io_op_defs[req->opcode].async_ctx) + if (!force && !io_op_defs[req->opcode].async_ctx) return 0; if (!req->io) { if (__io_alloc_async_ctx(req)) @@ -3084,8 +3096,7 @@ static inline int kiocb_wait_page_queue_init(struct kiocb *kiocb, * succeed, or in rare cases where it fails, we then fall back to using the * async worker threads for a blocking retry. */ -static bool io_rw_should_retry(struct io_kiocb *req, struct iovec *iovec, - struct iovec *fast_iov, struct iov_iter *iter) +static bool io_rw_should_retry(struct io_kiocb *req) { struct kiocb *kiocb = &req->rw.kiocb; int ret; @@ -3094,8 +3105,8 @@ static bool io_rw_should_retry(struct io_kiocb *req, struct iovec *iovec, if (req->flags & REQ_F_NOWAIT) return false; - /* already tried, or we're doing O_DIRECT */ - if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_WAITQ)) + /* Only for buffered IO */ + if (kiocb->ki_flags & IOCB_DIRECT) return false; /* * just use poll if we can, and don't attempt if the fs doesn't @@ -3104,16 +3115,6 @@ static bool io_rw_should_retry(struct io_kiocb *req, struct iovec *iovec, if (file_can_poll(req->file) || !(req->file->f_mode & FMODE_BUF_RASYNC)) return false; - /* - * If request type doesn't require req->io to defer in general, - * we need to allocate it here - */ - if (!req->io) { - if (__io_alloc_async_ctx(req)) - return false; - io_req_map_rw(req, iovec, fast_iov, iter); - } - ret = kiocb_wait_page_queue_init(kiocb, &req->io->rw.wpq, io_async_buf_func, req); if (!ret) { @@ -3167,29 +3168,46 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, ret2 = io_iter_do_read(req, iter); - /* Catch -EAGAIN return for forced non-blocking submission */ - if (!force_nonblock || (ret2 != -EAGAIN && ret2 != -EIO)) { - kiocb_done(kiocb, ret2, cs); - } else { + if (!iov_iter_count(iter)) + goto done; + + if (ret2 >= 0) { + /* successful read, but partial */ + if (req->flags & REQ_F_NOWAIT) + goto done; + io_size -= ret2; copy_iov: - ret = io_setup_async_rw(req, iovec, inline_vecs, iter); + ret = io_setup_async_rw(req, iovec, inline_vecs, iter, true); if (ret) goto out_free; /* it's copied and will be cleaned with ->io */ iovec = NULL; + /* now use our persistent iterator, if we aren't already */ + iter = &req->io->rw.iter; + + if (ret2 > 0) { +retry: + req->io->rw.bytes_done += ret2; + } /* if we can retry, do so with the callbacks armed */ - if (io_rw_should_retry(req, iovec, inline_vecs, iter)) { + if (io_rw_should_retry(req)) { ret2 = io_iter_do_read(req, iter); if (ret2 == -EIOCBQUEUED) { goto out_free; + } else if (ret2 == io_size) { + goto done; } else if (ret2 != -EAGAIN) { - kiocb_done(kiocb, ret2, cs); - goto out_free; + /* we got some bytes, but not all. retry. */ + if (ret2 > 0) + goto retry; + goto done; } } kiocb->ki_flags &= ~IOCB_WAITQ; return -EAGAIN; } +done: + kiocb_done(kiocb, ret2, cs); out_free: if (iovec) kfree(iovec); @@ -3282,7 +3300,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, kiocb_done(kiocb, ret2, cs); } else { copy_iov: - ret = io_setup_async_rw(req, iovec, inline_vecs, iter); + ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false); if (ret) goto out_free; /* it's copied and will be cleaned with ->io */ -- 2.28.0