Signed-off-by: Keith Busch <kbusch@xxxxxxxxxx>
---
v2->v3
The previous attempted to split the setup and submit further, but was
requested to go back to this simpler version.
block/blk-core.c | 49 +++++++++++++++-------------------
block/blk-mq.c | 6 +++--
include/linux/blkdev.h | 6 -----
include/linux/io_uring_types.h | 1 +
io_uring/io_uring.c | 37 ++++++++++++++++++-------
5 files changed, 54 insertions(+), 45 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index 99d8b9812b18f..b8f8aa1376e60 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1043,32 +1043,6 @@ int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork,
}
EXPORT_SYMBOL(kblockd_mod_delayed_work_on);
-void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
-{
- struct task_struct *tsk = current;
-
- /*
- * If this is a nested plug, don't actually assign it.
- */
- if (tsk->plug)
- return;
-
- plug->mq_list = NULL;
- plug->cached_rq = NULL;
- plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT);
- plug->rq_count = 0;
- plug->multiple_queues = false;
- plug->has_elevator = false;
- plug->nowait = false;
- INIT_LIST_HEAD(&plug->cb_list);
-
- /*
- * Store ordering should not be needed here, since a potential
- * preempt will imply a full memory barrier
- */
- tsk->plug = plug;
-}
-
/**
* blk_start_plug - initialize blk_plug and track it inside the task_struct
* @plug: The &struct blk_plug that needs to be initialized
@@ -1094,7 +1068,28 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
*/
void blk_start_plug(struct blk_plug *plug)
{
- blk_start_plug_nr_ios(plug, 1);
+ struct task_struct *tsk = current;
+
+ /*
+ * If this is a nested plug, don't actually assign it.
+ */
+ if (tsk->plug)
+ return;
+
+ plug->mq_list = NULL;
+ plug->cached_rq = NULL;
+ plug->nr_ios = 1;
+ plug->rq_count = 0;
+ plug->multiple_queues = false;
+ plug->has_elevator = false;
+ plug->nowait = false;
+ INIT_LIST_HEAD(&plug->cb_list);
+
+ /*
+ * Store ordering should not be needed here, since a potential
+ * preempt will imply a full memory barrier
+ */
+ tsk->plug = plug;
}
EXPORT_SYMBOL(blk_start_plug);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index d50b1d62a3d92..fc75fb9ef34ed 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -523,7 +523,8 @@ static struct request *blk_mq_rq_cache_fill(struct request_queue *q,
.q = q,
.flags = flags,
.cmd_flags = opf,
- .nr_tags = plug->nr_ios,
+ .nr_tags = min_t(unsigned int, plug->nr_ios,
+ BLK_MAX_REQUEST_COUNT),
.cached_rq = &plug->cached_rq,
};
struct request *rq;
@@ -2859,7 +2860,8 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
rq_qos_throttle(q, bio);
if (plug) {
- data.nr_tags = plug->nr_ios;
+ data.nr_tags = min_t(unsigned int, plug->nr_ios,
+ BLK_MAX_REQUEST_COUNT);
plug->nr_ios = 1;
data.cached_rq = &plug->cached_rq;
}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ed44a997f629f..a2a022957cd96 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -984,7 +984,6 @@ struct blk_plug_cb {
extern struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug,
void *data, int size);
extern void blk_start_plug(struct blk_plug *);
-extern void blk_start_plug_nr_ios(struct blk_plug *, unsigned short);
extern void blk_finish_plug(struct blk_plug *);
void __blk_flush_plug(struct blk_plug *plug, bool from_schedule);
@@ -1000,11 +999,6 @@ long nr_blockdev_pages(void);
struct blk_plug {
};
-static inline void blk_start_plug_nr_ios(struct blk_plug *plug,
- unsigned short nr_ios)
-{
-}
-
static inline void blk_start_plug(struct blk_plug *plug)
{
}
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index f04ce513fadba..109d4530bccbf 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -175,6 +175,7 @@ struct io_submit_state {
bool need_plug;
unsigned short submit_nr;
unsigned int cqes_count;
+ int fd;
struct blk_plug plug;
struct io_uring_cqe cqes[16];
};
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 135da2fd0edab..36f45d234fe49 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -2195,18 +2195,25 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
return -EINVAL;
if (def->needs_file) {
- struct io_submit_state *state = &ctx->submit_state;
-
req->cqe.fd = READ_ONCE(sqe->fd);
/*
* Plug now if we have more than 2 IO left after this, and the
* target is potentially a read/write to block based storage.
*/
- if (state->need_plug && def->plug) {
- state->plug_started = true;
- state->need_plug = false;
- blk_start_plug_nr_ios(&state->plug, state->submit_nr);
+ if (def->plug) {
+ struct io_submit_state *state = &ctx->submit_state;
+
+ if (state->need_plug) {
+ state->plug_started = true;
+ state->need_plug = false;
+ state->fd = req->cqe.fd;
+ blk_start_plug(&state->plug);
+ } else if (state->plug_started &&
+ state->fd == req->cqe.fd &&
+ !state->link.head) {
+ state->plug.nr_ios++;
+ }
}
}
@@ -2267,7 +2274,8 @@ static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
}
static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
- const struct io_uring_sqe *sqe)
+ const struct io_uring_sqe *sqe,
+ struct io_wq_work_list *req_list)
__must_hold(&ctx->uring_lock)
{
struct io_submit_link *link = &ctx->submit_state.link;
@@ -2315,7 +2323,7 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
return 0;
}
- io_queue_sqe(req);
+ wq_list_add_tail(&req->comp_list, req_list);
return 0;
}
@@ -2400,6 +2408,8 @@ int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
__must_hold(&ctx->uring_lock)
{
unsigned int entries = io_sqring_entries(ctx);
+ struct io_wq_work_list req_list;
+ struct io_kiocb *req;
unsigned int left;
int ret;
@@ -2410,9 +2420,9 @@ int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
io_get_task_refs(left);
io_submit_state_start(&ctx->submit_state, left);
+ INIT_WQ_LIST(&req_list);
do {
const struct io_uring_sqe *sqe;
- struct io_kiocb *req;
if (unlikely(!io_alloc_req(ctx, &req)))
break;
@@ -2425,13 +2435,20 @@ int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
* Continue submitting even for sqe failure if the
* ring was setup with IORING_SETUP_SUBMIT_ALL
*/
- if (unlikely(io_submit_sqe(ctx, req, sqe)) &&
+ if (unlikely(io_submit_sqe(ctx, req, sqe, &req_list)) &&
!(ctx->flags & IORING_SETUP_SUBMIT_ALL)) {
left--;
break;
}
} while (--left);
+ while (req_list.first) {
+ req = container_of(req_list.first, struct io_kiocb, comp_list);
+ req_list.first = req->comp_list.next;
+ req->comp_list.next = NULL;
+ io_queue_sqe(req);
+ }
+
if (unlikely(left)) {
ret -= left;
/* try again if it submitted nothing and can't allocate a req */