[PATCH 3/3] io_uring: set plug tags for same file

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Keith Busch <kbusch@xxxxxxxxxx>

io_uring tries to optimize allocating tags by hinting to the plug how
many it expects to need for a batch instead of allocating each tag
individually. But io_uring submission queueus may have a mix of many
devices for io, so the number of io's counted may be overestimated. This
can lead to allocating too many tags, which adds overhead to finding
that many contiguous tags, freeing up the ones we didn't use, and may
starve out other users that can actually use them.

When starting a new batch of uring commands, count only commands that
match the file descriptor of the first seen for this optimization. This
avoids have to call the unlikely "blk_mq_free_plug_rqs()" at the end of
a submission when multiple devices are used in a batch.

Signed-off-by: Keith Busch <kbusch@xxxxxxxxxx>
---
 block/blk-core.c               | 49 +++++++++++++++-------------------
 block/blk-mq.c                 |  6 +++--
 include/linux/io_uring_types.h |  1 +
 io_uring/io_uring.c            | 19 ++++++++-----
 4 files changed, 40 insertions(+), 35 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 90de50082146a..72523a983c419 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1043,32 +1043,6 @@ int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork,
 }
 EXPORT_SYMBOL(kblockd_mod_delayed_work_on);
 
-void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
-{
-	struct task_struct *tsk = current;
-
-	/*
-	 * If this is a nested plug, don't actually assign it.
-	 */
-	if (tsk->plug)
-		return;
-
-	plug->mq_list = NULL;
-	plug->cached_rq = NULL;
-	plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT);
-	plug->rq_count = 0;
-	plug->multiple_queues = false;
-	plug->has_elevator = false;
-	plug->nowait = false;
-	INIT_LIST_HEAD(&plug->cb_list);
-
-	/*
-	 * Store ordering should not be needed here, since a potential
-	 * preempt will imply a full memory barrier
-	 */
-	tsk->plug = plug;
-}
-
 /**
  * blk_start_plug - initialize blk_plug and track it inside the task_struct
  * @plug:	The &struct blk_plug that needs to be initialized
@@ -1094,7 +1068,28 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
  */
 void blk_start_plug(struct blk_plug *plug)
 {
-	blk_start_plug_nr_ios(plug, 1);
+	struct task_struct *tsk = current;
+
+	/*
+	 * If this is a nested plug, don't actually assign it.
+	 */
+	if (tsk->plug)
+		return;
+
+	plug->mq_list = NULL;
+	plug->cached_rq = NULL;
+	plug->nr_ios = 1;
+	plug->rq_count = 0;
+	plug->multiple_queues = false;
+	plug->has_elevator = false;
+	plug->nowait = false;
+	INIT_LIST_HEAD(&plug->cb_list);
+
+	/*
+	 * Store ordering should not be needed here, since a potential
+	 * preempt will imply a full memory barrier
+	 */
+	tsk->plug = plug;
 }
 EXPORT_SYMBOL(blk_start_plug);
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index f14b8669ac69f..1e18ccd7d1376 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -524,7 +524,8 @@ static struct request *blk_mq_rq_cache_fill(struct request_queue *q,
 		.q		= q,
 		.flags		= flags,
 		.cmd_flags	= opf,
-		.nr_tags	= plug->nr_ios,
+		.nr_tags	= min_t(unsigned int, plug->nr_ios,
+					BLK_MAX_REQUEST_COUNT),
 		.cached_rq	= &plug->cached_rq,
 	};
 	struct request *rq;
@@ -2867,7 +2868,8 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
 	rq_qos_throttle(q, bio);
 
 	if (plug) {
-		data.nr_tags = plug->nr_ios;
+		data.nr_tags = min_t(unsigned int, plug->nr_ios,
+				     BLK_MAX_REQUEST_COUNT);
 		plug->nr_ios = 1;
 		data.cached_rq = &plug->cached_rq;
 	}
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 598553877fc25..6d922e7749989 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -175,6 +175,7 @@ struct io_submit_state {
 	bool			need_plug;
 	unsigned short		submit_nr;
 	unsigned int		cqes_count;
+	int			fd;
 	struct blk_plug		plug;
 	struct io_uring_cqe	cqes[16];
 };
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 5434aef0a8ef7..379e41b53efde 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -2209,18 +2209,25 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 		return -EINVAL;
 
 	if (def->needs_file) {
-		struct io_submit_state *state = &ctx->submit_state;
-
 		req->cqe.fd = READ_ONCE(sqe->fd);
 
 		/*
 		 * Plug now if we have more than 2 IO left after this, and the
 		 * target is potentially a read/write to block based storage.
 		 */
-		if (state->need_plug && def->plug) {
-			state->plug_started = true;
-			state->need_plug = false;
-			blk_start_plug_nr_ios(&state->plug, state->submit_nr);
+		if (def->plug) {
+			struct io_submit_state *state = &ctx->submit_state;
+
+			if (state->need_plug) {
+				state->plug_started = true;
+				state->need_plug = false;
+				state->fd = req->cqe.fd;
+				blk_start_plug(&state->plug);
+			} else if (state->plug_started &&
+				   state->fd == req->cqe.fd &&
+				   !state->link.head) {
+				state->plug.nr_ios++;
+			}
 		}
 	}
 
-- 
2.34.1





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux