From: Keith Busch <kbusch@xxxxxxxxxx> io_uring tries to optimize allocating tags by hinting to the plug how many it expects to need for a batch instead of allocating each tag individually. But io_uring submission queueus may have a mix of many devices for io, so the number of io's counted may be overestimated. This can lead to allocating too many tags, which adds overhead to finding that many contiguous tags, freeing up the ones we didn't use, and may starve out other users that can actually use them. When starting a new batch of uring commands, count only commands that match the file descriptor of the first seen for this optimization. This avoids have to call the unlikely "blk_mq_free_plug_rqs()" at the end of a submission when multiple devices are used in a batch. Signed-off-by: Keith Busch <kbusch@xxxxxxxxxx> --- block/blk-core.c | 49 +++++++++++++++------------------- block/blk-mq.c | 6 +++-- include/linux/io_uring_types.h | 1 + io_uring/io_uring.c | 19 ++++++++----- 4 files changed, 40 insertions(+), 35 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 90de50082146a..72523a983c419 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1043,32 +1043,6 @@ int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, } EXPORT_SYMBOL(kblockd_mod_delayed_work_on); -void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios) -{ - struct task_struct *tsk = current; - - /* - * If this is a nested plug, don't actually assign it. - */ - if (tsk->plug) - return; - - plug->mq_list = NULL; - plug->cached_rq = NULL; - plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT); - plug->rq_count = 0; - plug->multiple_queues = false; - plug->has_elevator = false; - plug->nowait = false; - INIT_LIST_HEAD(&plug->cb_list); - - /* - * Store ordering should not be needed here, since a potential - * preempt will imply a full memory barrier - */ - tsk->plug = plug; -} - /** * blk_start_plug - initialize blk_plug and track it inside the task_struct * @plug: The &struct blk_plug that needs to be initialized @@ -1094,7 +1068,28 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios) */ void blk_start_plug(struct blk_plug *plug) { - blk_start_plug_nr_ios(plug, 1); + struct task_struct *tsk = current; + + /* + * If this is a nested plug, don't actually assign it. + */ + if (tsk->plug) + return; + + plug->mq_list = NULL; + plug->cached_rq = NULL; + plug->nr_ios = 1; + plug->rq_count = 0; + plug->multiple_queues = false; + plug->has_elevator = false; + plug->nowait = false; + INIT_LIST_HEAD(&plug->cb_list); + + /* + * Store ordering should not be needed here, since a potential + * preempt will imply a full memory barrier + */ + tsk->plug = plug; } EXPORT_SYMBOL(blk_start_plug); diff --git a/block/blk-mq.c b/block/blk-mq.c index f14b8669ac69f..1e18ccd7d1376 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -524,7 +524,8 @@ static struct request *blk_mq_rq_cache_fill(struct request_queue *q, .q = q, .flags = flags, .cmd_flags = opf, - .nr_tags = plug->nr_ios, + .nr_tags = min_t(unsigned int, plug->nr_ios, + BLK_MAX_REQUEST_COUNT), .cached_rq = &plug->cached_rq, }; struct request *rq; @@ -2867,7 +2868,8 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q, rq_qos_throttle(q, bio); if (plug) { - data.nr_tags = plug->nr_ios; + data.nr_tags = min_t(unsigned int, plug->nr_ios, + BLK_MAX_REQUEST_COUNT); plug->nr_ios = 1; data.cached_rq = &plug->cached_rq; } diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 598553877fc25..6d922e7749989 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -175,6 +175,7 @@ struct io_submit_state { bool need_plug; unsigned short submit_nr; unsigned int cqes_count; + int fd; struct blk_plug plug; struct io_uring_cqe cqes[16]; }; diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 5434aef0a8ef7..379e41b53efde 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2209,18 +2209,25 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, return -EINVAL; if (def->needs_file) { - struct io_submit_state *state = &ctx->submit_state; - req->cqe.fd = READ_ONCE(sqe->fd); /* * Plug now if we have more than 2 IO left after this, and the * target is potentially a read/write to block based storage. */ - if (state->need_plug && def->plug) { - state->plug_started = true; - state->need_plug = false; - blk_start_plug_nr_ios(&state->plug, state->submit_nr); + if (def->plug) { + struct io_submit_state *state = &ctx->submit_state; + + if (state->need_plug) { + state->plug_started = true; + state->need_plug = false; + state->fd = req->cqe.fd; + blk_start_plug(&state->plug); + } else if (state->plug_started && + state->fd == req->cqe.fd && + !state->link.head) { + state->plug.nr_ios++; + } } } -- 2.34.1