[PATCH 2/2] io_uring: internally retry short reads

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



We've had a few application cases of not handling short reads properly,
and it is understandable as short reads aren't really expected if the
application isn't doing non-blocking IO.

Now that we retain the iov_iter over retries, we can implement internal
retry pretty trivially. This ensures that we don't return a short read,
even for buffered reads on page cache conflicts.

Signed-off-by: Jens Axboe <axboe@xxxxxxxxx>
---
 fs/io_uring.c | 72 ++++++++++++++++++++++++++++++++-------------------
 1 file changed, 45 insertions(+), 27 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index a20fccf91d76..c6a9f1b11e85 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -509,6 +509,7 @@ struct io_async_msghdr {
 struct io_async_rw {
 	struct iovec			fast_iov[UIO_FASTIOV];
 	struct iov_iter			iter;
+	size_t				bytes_done;
 	struct wait_page_queue		wpq;
 };
 
@@ -914,7 +915,8 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
 			       struct iovec **iovec, struct iov_iter *iter,
 			       bool needs_lock);
 static int io_setup_async_rw(struct io_kiocb *req, struct iovec *iovec,
-			     struct iovec *fast_iov, struct iov_iter *iter);
+			     struct iovec *fast_iov, struct iov_iter *iter,
+			     bool force);
 
 static struct kmem_cache *req_cachep;
 
@@ -2296,7 +2298,7 @@ static bool io_resubmit_prep(struct io_kiocb *req, int error)
 	ret = io_import_iovec(rw, req, &iovec, &iter, false);
 	if (ret < 0)
 		goto end_req;
-	ret = io_setup_async_rw(req, iovec, inline_vecs, &iter);
+	ret = io_setup_async_rw(req, iovec, inline_vecs, &iter, false);
 	if (!ret)
 		return true;
 	kfree(iovec);
@@ -2586,6 +2588,14 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
 {
 	struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
 
+	/* add previously done IO, if any */
+	if (req->io && req->io->rw.bytes_done > 0) {
+		if (ret < 0)
+			ret = req->io->rw.bytes_done;
+		else
+			ret += req->io->rw.bytes_done;
+	}
+
 	if (req->flags & REQ_F_CUR_POS)
 		req->file->f_pos = kiocb->ki_pos;
 	if (ret >= 0 && kiocb->ki_complete == io_complete_rw)
@@ -2932,6 +2942,7 @@ static void io_req_map_rw(struct io_kiocb *req, const struct iovec *iovec,
 	struct io_async_rw *rw = &req->io->rw;
 
 	memcpy(&rw->iter, iter, sizeof(*iter));
+	rw->bytes_done = 0;
 	if (!iovec) {
 		rw->iter.iov = rw->fast_iov;
 		if (rw->iter.iov != fast_iov)
@@ -2958,9 +2969,10 @@ static int io_alloc_async_ctx(struct io_kiocb *req)
 }
 
 static int io_setup_async_rw(struct io_kiocb *req, struct iovec *iovec,
-			     struct iovec *fast_iov, struct iov_iter *iter)
+			     struct iovec *fast_iov, struct iov_iter *iter,
+			     bool force)
 {
-	if (!io_op_defs[req->opcode].async_ctx)
+	if (!force && !io_op_defs[req->opcode].async_ctx)
 		return 0;
 	if (!req->io) {
 		if (__io_alloc_async_ctx(req))
@@ -3084,8 +3096,7 @@ static inline int kiocb_wait_page_queue_init(struct kiocb *kiocb,
  * succeed, or in rare cases where it fails, we then fall back to using the
  * async worker threads for a blocking retry.
  */
-static bool io_rw_should_retry(struct io_kiocb *req, struct iovec *iovec,
-			       struct iovec *fast_iov, struct iov_iter *iter)
+static bool io_rw_should_retry(struct io_kiocb *req)
 {
 	struct kiocb *kiocb = &req->rw.kiocb;
 	int ret;
@@ -3094,8 +3105,8 @@ static bool io_rw_should_retry(struct io_kiocb *req, struct iovec *iovec,
 	if (req->flags & REQ_F_NOWAIT)
 		return false;
 
-	/* already tried, or we're doing O_DIRECT */
-	if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_WAITQ))
+	/* Only for buffered IO */
+	if (kiocb->ki_flags & IOCB_DIRECT)
 		return false;
 	/*
 	 * just use poll if we can, and don't attempt if the fs doesn't
@@ -3104,16 +3115,6 @@ static bool io_rw_should_retry(struct io_kiocb *req, struct iovec *iovec,
 	if (file_can_poll(req->file) || !(req->file->f_mode & FMODE_BUF_RASYNC))
 		return false;
 
-	/*
-	 * If request type doesn't require req->io to defer in general,
-	 * we need to allocate it here
-	 */
-	if (!req->io) {
-		if (__io_alloc_async_ctx(req))
-			return false;
-		io_req_map_rw(req, iovec, fast_iov, iter);
-	}
-
 	ret = kiocb_wait_page_queue_init(kiocb, &req->io->rw.wpq,
 						io_async_buf_func, req);
 	if (!ret) {
@@ -3167,29 +3168,46 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
 
 	ret2 = io_iter_do_read(req, iter);
 
-	/* Catch -EAGAIN return for forced non-blocking submission */
-	if (!force_nonblock || (ret2 != -EAGAIN && ret2 != -EIO)) {
-		kiocb_done(kiocb, ret2, cs);
-	} else {
+	if (!iov_iter_count(iter))
+		goto done;
+
+	if (ret2 >= 0) {
+		/* successful read, but partial */
+		if (req->flags & REQ_F_NOWAIT)
+			goto done;
+		io_size -= ret2;
 copy_iov:
-		ret = io_setup_async_rw(req, iovec, inline_vecs, iter);
+		ret = io_setup_async_rw(req, iovec, inline_vecs, iter, true);
 		if (ret)
 			goto out_free;
 		/* it's copied and will be cleaned with ->io */
 		iovec = NULL;
+		/* now use our persistent iterator, if we aren't already */
+		iter = &req->io->rw.iter;
+
+		if (ret2 > 0) {
+retry:
+			req->io->rw.bytes_done += ret2;
+		}
 		/* if we can retry, do so with the callbacks armed */
-		if (io_rw_should_retry(req, iovec, inline_vecs, iter)) {
+		if (io_rw_should_retry(req)) {
 			ret2 = io_iter_do_read(req, iter);
 			if (ret2 == -EIOCBQUEUED) {
 				goto out_free;
+			} else if (ret2 == io_size) {
+				goto done;
 			} else if (ret2 != -EAGAIN) {
-				kiocb_done(kiocb, ret2, cs);
-				goto out_free;
+				/* we got some bytes, but not all. retry. */
+				if (ret2 > 0)
+					goto retry;
+				goto done;
 			}
 		}
 		kiocb->ki_flags &= ~IOCB_WAITQ;
 		return -EAGAIN;
 	}
+done:
+	kiocb_done(kiocb, ret2, cs);
 out_free:
 	if (iovec)
 		kfree(iovec);
@@ -3282,7 +3300,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
 		kiocb_done(kiocb, ret2, cs);
 	} else {
 copy_iov:
-		ret = io_setup_async_rw(req, iovec, inline_vecs, iter);
+		ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false);
 		if (ret)
 			goto out_free;
 		/* it's copied and will be cleaned with ->io */
-- 
2.28.0




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux