[PATCH v3 4/8] block/mq-deadline: Only use zone locking if necessary

Bart Van Assche <bvanassche@xxxxxxx> · Mon, 27 Jun 2022 16:43:31 -0700

Measurements have shown that limiting the queue depth to one for zoned
writes has a significant negative performance impact on zoned UFS devices.
Hence this patch that disables zone locking from the mq-deadline scheduler
for storage controllers that support pipelining zoned writes. This patch is
based on the following assumptions:
- Applications submit write requests to sequential write required zones
  in order.
- The I/O priority of all pipelined write requests is the same per zone.
- Pipelined zoned write requests are submitted to a single hardware
  queue per zone.
- If such write requests get reordered by the software or hardware queue
  mechanism, nr_requests - 1 retries are sufficient to reorder the write
  requests.
- It happens infrequently that zoned write requests are reordered by the
  block layer.
- Either no I/O scheduler is used or an I/O scheduler is used that
  submits write requests per zone in LBA order.

Cc: Damien Le Moal <damien.lemoal@xxxxxxx>
Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx>
---
 block/blk-zoned.c   |  3 ++-
 block/mq-deadline.c | 18 ++++++++++++------
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 9da8cf1bb378..63c730a18ac4 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -513,7 +513,8 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
 		break;
 	case BLK_ZONE_TYPE_SEQWRITE_REQ:
 	case BLK_ZONE_TYPE_SEQWRITE_PREF:
-		if (!args->seq_zones_wlock) {
+		if (!blk_queue_pipeline_zoned_writes(q) &&
+		    !args->seq_zones_wlock) {
 			args->seq_zones_wlock =
 				blk_alloc_zone_bitmap(q->node, args->nr_zones);
 			if (!args->seq_zones_wlock)
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 1a9e835e816c..aaef07a55984 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -292,7 +292,8 @@ deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
 		return NULL;
 
 	rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next);
-	if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q))
+	if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q) ||
+	    blk_queue_pipeline_zoned_writes(rq->q))
 		return rq;
 
 	/*
@@ -326,7 +327,8 @@ deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
 	if (!rq)
 		return NULL;
 
-	if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q))
+	if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q) ||
+	    blk_queue_pipeline_zoned_writes(rq->q))
 		return rq;
 
 	/*
@@ -445,8 +447,9 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd,
 	}
 
 	/*
-	 * For a zoned block device, if we only have writes queued and none of
-	 * them can be dispatched, rq will be NULL.
+	 * For a zoned block device that requires write serialization, if we
+	 * only have writes queued and none of them can be dispatched, rq will
+	 * be NULL.
 	 */
 	if (!rq)
 		return NULL;
@@ -719,6 +722,8 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 	u8 ioprio_class = IOPRIO_PRIO_CLASS(ioprio);
 	struct dd_per_prio *per_prio;
 	enum dd_prio prio;
+	bool pipelined_seq_write = blk_queue_pipeline_zoned_writes(q) &&
+		blk_rq_is_seq_zone_write(rq);
 	LIST_HEAD(free);
 
 	lockdep_assert_held(&dd->lock);
@@ -743,7 +748,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 
 	trace_block_rq_insert(rq);
 
-	if (at_head) {
+	if (at_head && !pipelined_seq_write) {
 		list_add(&rq->queuelist, &per_prio->dispatch);
 		rq->fifo_time = jiffies;
 	} else {
@@ -823,7 +828,8 @@ static void dd_finish_request(struct request *rq)
 
 	atomic_inc(&per_prio->stats.completed);
 
-	if (blk_queue_is_zoned(q)) {
+	if (blk_queue_is_zoned(rq->q) &&
+	    !blk_queue_pipeline_zoned_writes(q)) {
 		unsigned long flags;
 
 		spin_lock_irqsave(&dd->zone_lock, flags);