On 5/23/23 00:18, Christoph Hellwig wrote:
On Mon, May 22, 2023 at 11:38:37AM -0700, Bart Van Assche wrote:
Send requeued requests to the I/O scheduler such that the I/O scheduler
can control the order in which requests are dispatched.
This patch reworks commit aef1897cd36d ("blk-mq: insert rq with DONTPREP
to hctx dispatch list when requeue").
But you need to explain why it is safe. I think it is because you add
DONTPREP to RQF_NOMERGE_FLAGS, but that really belongs into the commit
log.
Hi Christoph,
I will add the above explanation in the commit message.
+ list_for_each_entry_safe(rq, next, &requeue_list, queuelist) {
+ if (!(rq->rq_flags & RQF_DONTPREP)) {
list_del_init(&rq->queuelist);
blk_mq_insert_request(rq, BLK_MQ_INSERT_AT_HEAD);
}
}
+ while (!list_empty(&requeue_list)) {
+ rq = list_entry(requeue_list.next, struct request, queuelist);
+ list_del_init(&rq->queuelist);
+ blk_mq_insert_request(rq, 0);
So now both started and unstarted requests go through
blk_mq_insert_request, and thus potentially the I/O scheduler, but
RQF_DONTPREP request that are by definition further along in processing
are inserted at the tail, while !RQF_DONTPREP ones get inserted at head.
This feels wrong, and we probably need to sort out why and how we are
doing head insertation on a requeue first.
The use cases for requeuing requests are as follows:
* Resubmitting a partially completed request (ATA, SCSI, loop, ...).
* Handling connection recovery (nbd, ublk, ...).
* Simulating the "device busy" condition (null_blk).
* Handling the queue full condition (virtio-blk).
* Handling path errors by dm-mpath (DM_ENDIO_REQUEUE).
* Handling retryable I/O errors (mmc, NVMe).
* Handling unit attentions (SCSI).
* Unplugging a CPU.
Do you perhaps want me to select BLK_MQ_INSERT_AT_HEAD for all requeued requests?
+ if (q->elevator) {
+ if (q->elevator->type->ops.requeue_request)
+ list_for_each_entry(rq, &for_sched, queuelist)
+ q->elevator->type->ops.
+ requeue_request(rq);
+ q->elevator->type->ops.insert_requests(hctx, &for_sched,
+ BLK_MQ_INSERT_AT_HEAD);
+ }
+
None of this is explained in the commit log, please elaborate on the
rationale for it. Also:
- this code block really belongs into a helper
- calling ->requeue_request in a loop before calling ->insert_requests
feels wrong A BLK_MQ_INSERT_REQUEUE flag feels like the better
interface here.
Pushing a request back to hctx->dispatch is a requeuing mechanism. I want
to send requeued requests to the I/O scheduler.
Regarding the BLK_MQ_INSERT_REQUEUE suggestion, how about adding the patch
below near the start of this patch series?
Thanks,
Bart.
block: Remove elevator_type.requeue_request()
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 117aec1791c0..319d3c5a0f85 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -6232,6 +6232,7 @@ static inline void bfq_update_insert_stats(struct request_queue *q,
#endif /* CONFIG_BFQ_CGROUP_DEBUG */
static struct bfq_queue *bfq_init_rq(struct request *rq);
+static void bfq_finish_requeue_request(struct request *rq);
static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
blk_insert_t flags)
@@ -6243,6 +6244,11 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
blk_opf_t cmd_flags;
LIST_HEAD(free);
+ if (rq->rq_flags & RQF_REQUEUED) {
+ rq->rq_flags &= ~RQF_REQUEUED;
+ bfq_finish_requeue_request(rq);
+ }
+
#ifdef CONFIG_BFQ_GROUP_IOSCHED
if (!cgroup_subsys_on_dfl(io_cgrp_subsys) && rq->bio)
bfqg_stats_update_legacy_io(q, rq);
@@ -7596,7 +7602,6 @@ static struct elevator_type iosched_bfq_mq = {
.ops = {
.limit_depth = bfq_limit_depth,
.prepare_request = bfq_prepare_request,
- .requeue_request = bfq_finish_requeue_request,
.finish_request = bfq_finish_request,
.exit_icq = bfq_exit_icq,
.insert_requests = bfq_insert_requests,
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index 1326526bb733..8d4b835539b1 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -58,13 +58,8 @@ static inline void blk_mq_sched_completed_request(struct request *rq, u64 now)
static inline void blk_mq_sched_requeue_request(struct request *rq)
{
- if (rq->rq_flags & RQF_USE_SCHED) {
- struct request_queue *q = rq->q;
- struct elevator_queue *e = q->elevator;
-
- if (e->type->ops.requeue_request)
- e->type->ops.requeue_request(rq);
- }
+ if (rq->rq_flags & RQF_USE_SCHED)
+ rq->rq_flags |= RQF_REQUEUED;
}
static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx)
diff --git a/block/elevator.h b/block/elevator.h
index 7ca3d7b6ed82..c1f459eebc9f 100644
--- a/block/elevator.h
+++ b/block/elevator.h
@@ -43,7 +43,6 @@ struct elevator_mq_ops {
struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
bool (*has_work)(struct blk_mq_hw_ctx *);
void (*completed_request)(struct request *, u64);
- void (*requeue_request)(struct request *);
struct request *(*former_request)(struct request_queue *, struct request *);
struct request *(*next_request)(struct request_queue *, struct request *);
void (*init_icq)(struct io_cq *);
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index f495be433276..b1f2e172fe61 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -608,6 +608,10 @@ static void kyber_insert_requests(struct blk_mq_hw_ctx *hctx,
spin_lock(&kcq->lock);
trace_block_rq_insert(rq);
+ if (rq->rq_flags & RQF_REQUEUED) {
+ rq->rq_flags &= ~RQF_REQUEUED;
+ kyber_finish_request(rq);
+ }
if (flags & BLK_MQ_INSERT_AT_HEAD)
list_move(&rq->queuelist, head);
else
@@ -1022,7 +1026,6 @@ static struct elevator_type kyber_sched = {
.prepare_request = kyber_prepare_request,
.insert_requests = kyber_insert_requests,
.finish_request = kyber_finish_request,
- .requeue_request = kyber_finish_request,
.completed_request = kyber_completed_request,
.dispatch_request = kyber_dispatch_request,
.has_work = kyber_has_work,
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index d778cb6b2112..861ca3bb0bce 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -59,6 +59,9 @@ typedef __u32 __bitwise req_flags_t;
#define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19))
/* ->timeout has been called, don't expire again */
#define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21))
+/* The request is about to be requeued. */
+#define RQF_REQUEUED ((__force req_flags_t)(1 << 22))
+/* A reserved tag has been assigned to the request. */
#define RQF_RESV ((__force req_flags_t)(1 << 23))
/* flags that prevent us from merging requests: */