Re: [PATCH v2 02/12] block: Send flush requests to the I/O scheduler

Bart Van Assche <bvanassche@xxxxxxx> · Mon, 10 Apr 2023 17:15:44 -0700

On 4/10/23 00:46, Damien Le Moal wrote:
Given that this change has nothing specific to zoned devices, you could rewrite
the commit message to mention that. And are bfq and kyber OK with this change as
well ?

Also, to be consistent with this change, shouldn't blk_mq_sched_bypass_insert()
be updated as well ? That function is called from blk_mq_sched_insert_request().

Hi Damien,

I'm considering to replace patch 02/12 from this series by the patch below:


Subject: [PATCH] block: Send flush requests to the I/O scheduler

Send flush requests to the I/O scheduler such that I/O scheduler policies
are applied to writes with the FUA flag set. Separate the I/O scheduler
members from the flush members in struct request since with this patch
applied a request may pass through both an I/O scheduler and the flush
machinery.

This change affects the statistics of I/O schedulers that track I/O
statistics (BFQ and mq-deadline).

Cc: Christoph Hellwig <hch@xxxxxx>
Cc: Damien Le Moal <damien.lemoal@xxxxxxxxxxxxxxxxxx>
Cc: Ming Lei <ming.lei@xxxxxxxxxx>
Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx>
---
 block/blk-flush.c      |  8 ++++++--
 block/blk-mq-sched.c   | 19 +++++++++++++++----
 block/blk-mq-sched.h   |  1 +
 block/blk-mq.c         | 22 +++++-----------------
 include/linux/blk-mq.h | 27 +++++++++++----------------
 5 files changed, 38 insertions(+), 39 deletions(-)

diff --git a/block/blk-flush.c b/block/blk-flush.c
index 53202eff545e..cf0afb75fafd 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -336,8 +336,11 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
 		 * account of this driver tag
 		 */
 		flush_rq->rq_flags |= RQF_MQ_INFLIGHT;
-	} else
+	} else {
 		flush_rq->internal_tag = first_rq->internal_tag;
+		flush_rq->rq_flags |= RQF_ELV;
+		blk_mq_sched_prepare(flush_rq);
+	}

 	flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH;
 	flush_rq->cmd_flags |= (flags & REQ_DRV) | (flags & REQ_FAILFAST_MASK);
@@ -432,7 +435,8 @@ void blk_insert_flush(struct request *rq)
 	 */
 	if ((policy & REQ_FSEQ_DATA) &&
 	    !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
-		blk_mq_request_bypass_insert(rq, false, true);
+		blk_mq_sched_insert_request(rq, /*at_head=*/false,
+					    /*run_queue=*/true, /*async=*/true);
 		return;
 	}

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 62c8c1ba1321..9938c35aa7ed 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -18,6 +18,20 @@
 #include "blk-mq-tag.h"
 #include "blk-wbt.h"

+/* Prepare a request for insertion into an I/O scheduler. */
+void blk_mq_sched_prepare(struct request *rq)
+{
+	struct elevator_queue *e = rq->q->elevator;
+
+	INIT_HLIST_NODE(&rq->hash);
+	RB_CLEAR_NODE(&rq->rb_node);
+
+	if (e->type->ops.prepare_request) {
+		e->type->ops.prepare_request(rq);
+		rq->rq_flags |= RQF_ELVPRIV;
+	}
+}
+
 /*
  * Mark a hardware queue as needing a restart.
  */
@@ -397,10 +411,7 @@ static bool blk_mq_sched_bypass_insert(struct request *rq)
 	 * passthrough request is added to scheduler queue, there isn't any
 	 * chance to dispatch it given we prioritize requests in hctx->dispatch.
 	 */
-	if ((rq->rq_flags & RQF_FLUSH_SEQ) || blk_rq_is_passthrough(rq))
-		return true;
-
-	return false;
+	return req_op(rq) == REQ_OP_FLUSH || blk_rq_is_passthrough(rq);
 }

 void blk_mq_sched_insert_request(struct request *rq, bool at_head,
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index 025013972453..6337c5a66af6 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -8,6 +8,7 @@

 #define MAX_SCHED_RQ (16 * BLKDEV_DEFAULT_RQ)

+void blk_mq_sched_prepare(struct request *rq);
 bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
 		unsigned int nr_segs, struct request **merged_request);
 bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index db93b1a71157..2731466b1952 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -384,18 +384,8 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 	WRITE_ONCE(rq->deadline, 0);
 	req_ref_set(rq, 1);

-	if (rq->rq_flags & RQF_ELV) {
-		struct elevator_queue *e = data->q->elevator;
-
-		INIT_HLIST_NODE(&rq->hash);
-		RB_CLEAR_NODE(&rq->rb_node);
-
-		if (!op_is_flush(data->cmd_flags) &&
-		    e->type->ops.prepare_request) {
-			e->type->ops.prepare_request(rq);
-			rq->rq_flags |= RQF_ELVPRIV;
-		}
-	}
+	if (rq->rq_flags & RQF_ELV)
+		blk_mq_sched_prepare(rq);

 	return rq;
 }
@@ -452,13 +442,11 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
 		data->rq_flags |= RQF_ELV;

 		/*
-		 * Flush/passthrough requests are special and go directly to the
-		 * dispatch list. Don't include reserved tags in the
-		 * limiting, as it isn't useful.
+		 * Do not limit the depth for passthrough requests nor for
+		 * requests with a reserved tag.
 		 */
-		if (!op_is_flush(data->cmd_flags) &&
+		if (e->type->ops.limit_depth &&
 		    !blk_op_is_passthrough(data->cmd_flags) &&
-		    e->type->ops.limit_depth &&
 		    !(data->flags & BLK_MQ_REQ_RESERVED))
 			e->type->ops.limit_depth(data->cmd_flags, data);
 	}
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 06caacd77ed6..5e6c79ad83d2 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -169,25 +169,20 @@ struct request {
 		void *completion_data;
 	};

-
 	/*
 	 * Three pointers are available for the IO schedulers, if they need
-	 * more they have to dynamically allocate it.  Flush requests are
-	 * never put on the IO scheduler. So let the flush fields share
-	 * space with the elevator data.
+	 * more they have to dynamically allocate it.
 	 */
-	union {
-		struct {
-			struct io_cq		*icq;
-			void			*priv[2];
-		} elv;
-
-		struct {
-			unsigned int		seq;
-			struct list_head	list;
-			rq_end_io_fn		*saved_end_io;
-		} flush;
-	};
+	struct {
+		struct io_cq		*icq;
+		void			*priv[2];
+	} elv;
+
+	struct {
+		unsigned int		seq;
+		struct list_head	list;
+		rq_end_io_fn		*saved_end_io;
+	} flush;

 	union {
 		struct __call_single_data csd;