[RFC 2/2] io_uring: examine request result only after completion

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



__io_submit_sqe() checks the result of a request for -EAGAIN in
polled mode, without ensuring first that the request has completed.
The request will be immediately resubmitted in io_sq_wq_submit_work(),
potentially before the the first submission has completed.  This creates
a race where the original completion may set REQ_F_IOPOLL_COMPLETED in
a freed submission request.

Move a submitted request to the poll list unconditionally in polled mode.
The request can then be retried if necessary once the original submission
has indeed completed.

Signed-off-by: <bijan.mottahedeh@xxxxxxxxxx>
Reviewed-by: Dan Carpenter <dan.carpenter@xxxxxxxxxx>
---
 fs/io_uring.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 54 insertions(+), 9 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index acb213c..9e2eef5 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -328,6 +328,7 @@ struct io_kiocb {
 #define REQ_F_TIMEOUT		1024	/* timeout request */
 #define REQ_F_ISREG		2048	/* regular file */
 #define REQ_F_MUST_PUNT		4096	/* must be punted even for NONBLOCK */
+#define REQ_F_SQE_ALLOC		8192	/* dynamically allocated sqe */
 	u64			user_data;
 	u32			result;
 	u32			sequence;
@@ -657,6 +658,16 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
 static void io_free_req_many(struct io_ring_ctx *ctx, void **reqs, int *nr)
 {
 	if (*nr) {
+		int i;
+		struct io_kiocb *req;
+
+		for (i = 0; i < *nr; i++) {
+			req = reqs[i];
+			if (req->flags & REQ_F_SQE_ALLOC) {
+				kfree(req->submit.sqe);
+				req->submit.sqe = NULL;
+			}
+		}
 		kmem_cache_free_bulk(req_cachep, *nr, reqs);
 		percpu_ref_put_many(&ctx->refs, *nr);
 		*nr = 0;
@@ -668,6 +679,10 @@ static void __io_free_req(struct io_kiocb *req)
 	if (req->file && !(req->flags & REQ_F_FIXED_FILE))
 		fput(req->file);
 	percpu_ref_put(&req->ctx->refs);
+	if (req->flags & REQ_F_SQE_ALLOC) {
+		kfree(req->submit.sqe);
+		req->submit.sqe = NULL;
+	}
 	kmem_cache_free(req_cachep, req);
 }
 
@@ -789,6 +804,29 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
 		req = list_first_entry(done, struct io_kiocb, list);
 		list_del(&req->list);
 
+		/*
+		 * If a retry is needed, reset the request and queue
+		 * it for async submission.
+		 */
+		if (req->result == -EAGAIN) {
+			int ret;
+
+			refcount_set(&req->refs, 2);
+			req->flags &= ~REQ_F_IOPOLL_COMPLETED;
+			req->result = 0;
+
+			ret = io_queue_async(ctx, req, &req->submit);
+			if (!ret)
+				continue;
+
+			/*
+			 * The async submission failed.  Mark the
+			 * request as complete.
+			 */
+			refcount_set(&req->refs, 1);
+			req->flags |= REQ_F_IOPOLL_COMPLETED;
+		}
+
 		io_cqring_fill_event(ctx, req->user_data, req->result);
 		(*nr_events)++;
 
@@ -835,9 +873,14 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
 		 * Move completed entries to our local list. If we find a
 		 * request that requires polling, break out and complete
 		 * the done list first, if we have entries there.
+		 * Make sure the completed entries have been properly
+		 * reference counted before moving them.  This accounts
+		 * for a race where a request can complete before its
+		 * reference count is decremented after the submission.
 		 */
 		if (req->flags & REQ_F_IOPOLL_COMPLETED) {
-			list_move_tail(&req->list, &done);
+			if (refcount_read(&req->refs) < 2)
+				list_move_tail(&req->list, &done);
 			continue;
 		}
 		if (!list_empty(&done))
@@ -1007,8 +1050,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
 	if ((req->flags & REQ_F_LINK) && res != req->result)
 		req->flags |= REQ_F_FAIL_LINK;
 	req->result = res;
-	if (res != -EAGAIN)
-		req->flags |= REQ_F_IOPOLL_COMPLETED;
+	req->flags |= REQ_F_IOPOLL_COMPLETED;
 }
 
 /*
@@ -2116,6 +2158,7 @@ static int io_req_defer(struct io_ring_ctx *ctx, struct io_kiocb *req,
 
 	memcpy(sqe_copy, sqe, sizeof(*sqe_copy));
 	req->submit.sqe = sqe_copy;
+	req->flags |= REQ_F_SQE_ALLOC;
 
 	INIT_WORK(&req->work, io_sq_wq_submit_work);
 	trace_io_uring_defer(ctx, req, false);
@@ -2189,9 +2232,12 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
 		return ret;
 
 	if (ctx->flags & IORING_SETUP_IOPOLL) {
-		if (req->result == -EAGAIN)
-			return -EAGAIN;
-
+		/*
+		 * Add the request to the poll list unconditionally.
+		 * The request may be resubmitted in case of EAGAIN
+		 * during completion processing.
+		 */
+		memcpy(&req->submit, s, sizeof(*s));
 		/* workqueue context doesn't hold uring_lock, grab it now */
 		if (s->in_async)
 			mutex_lock(&ctx->uring_lock);
@@ -2284,9 +2330,6 @@ static void io_sq_wq_submit_work(struct work_struct *work)
 			io_put_req(req, NULL);
 		}
 
-		/* async context always use a copy of the sqe */
-		kfree(sqe);
-
 		/* if a dependent link is ready, do that as the next one */
 		if (!ret && nxt) {
 			req = nxt;
@@ -2451,6 +2494,7 @@ static int io_queue_async(struct io_ring_ctx *ctx, struct io_kiocb *req,
 		return -ENOMEM;
 
 	s->sqe = sqe_copy;
+	req->flags |= REQ_F_SQE_ALLOC;
 
 	memcpy(&req->submit, s, sizeof(*s));
 	list = io_async_list_from_sqe(ctx, s->sqe);
@@ -2607,6 +2651,7 @@ static void io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
 		}
 
 		s->sqe = sqe_copy;
+		req->flags |= REQ_F_SQE_ALLOC;
 		memcpy(&req->submit, s, sizeof(*s));
 		trace_io_uring_link(ctx, req, prev);
 		list_add_tail(&req->list, &prev->link_list);
-- 
1.8.3.1




[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux