[PATCH 5/5] block/mq-deadline: Remove zone locking

Bart Van Assche <bvanassche@xxxxxxx> · Tue, 14 Jun 2022 10:49:43 -0700

Measurements have shown that limiting the queue depth to one has a
significant negative performance impact on zoned UFS devices. Hence this
patch that removes zone locking from the mq-deadline scheduler. This
patch is based on the following assumptions:
- Applications submit write requests to sequential write required zones
  in order.
- If such write requests get reordered by the software or hardware queue
  mechanism, nr_hw_queues * nr_requests - 1 retries are sufficient to
  reorder the write requests.
- It happens infrequently that zoned write requests are reordered by the
  block layer.
- Either no I/O scheduler is used or an I/O scheduler is used that
  submits write requests per zone in LBA order.

DD_BE_PRIO is selected for sequential writes to preserve the LBA order.

See also commit 5700f69178e9 ("mq-deadline: Introduce zone locking
support").

Cc: Damien Le Moal <damien.lemoal@xxxxxxx>
Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx>
---
 block/mq-deadline.c | 74 ++++-----------------------------------------
 1 file changed, 6 insertions(+), 68 deletions(-)

diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 6ed602b2f80a..e168fc9a980a 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -104,7 +104,6 @@ struct deadline_data {
 	int prio_aging_expire;
 
 	spinlock_t lock;
-	spinlock_t zone_lock;
 };
 
 /* Maps an I/O priority class to a deadline scheduler priority. */
@@ -285,30 +284,10 @@ static struct request *
 deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
 		      enum dd_data_dir data_dir)
 {
-	struct request *rq;
-	unsigned long flags;
-
 	if (list_empty(&per_prio->fifo_list[data_dir]))
 		return NULL;
 
-	rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next);
-	if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q))
-		return rq;
-
-	/*
-	 * Look for a write request that can be dispatched, that is one with
-	 * an unlocked target zone.
-	 */
-	spin_lock_irqsave(&dd->zone_lock, flags);
-	list_for_each_entry(rq, &per_prio->fifo_list[DD_WRITE], queuelist) {
-		if (blk_req_can_dispatch_to_zone(rq))
-			goto out;
-	}
-	rq = NULL;
-out:
-	spin_unlock_irqrestore(&dd->zone_lock, flags);
-
-	return rq;
+	return rq_entry_fifo(per_prio->fifo_list[data_dir].next);
 }
 
 /*
@@ -319,29 +298,7 @@ static struct request *
 deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
 		      enum dd_data_dir data_dir)
 {
-	struct request *rq;
-	unsigned long flags;
-
-	rq = per_prio->next_rq[data_dir];
-	if (!rq)
-		return NULL;
-
-	if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q))
-		return rq;
-
-	/*
-	 * Look for a write request that can be dispatched, that is one with
-	 * an unlocked target zone.
-	 */
-	spin_lock_irqsave(&dd->zone_lock, flags);
-	while (rq) {
-		if (blk_req_can_dispatch_to_zone(rq))
-			break;
-		rq = deadline_latter_request(rq);
-	}
-	spin_unlock_irqrestore(&dd->zone_lock, flags);
-
-	return rq;
+	return per_prio->next_rq[data_dir];
 }
 
 /*
@@ -467,10 +424,6 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd,
 	ioprio_class = dd_rq_ioclass(rq);
 	prio = ioprio_class_to_prio[ioprio_class];
 	dd->per_prio[prio].stats.dispatched++;
-	/*
-	 * If the request needs its target zone locked, do it.
-	 */
-	blk_req_zone_write_lock(rq);
 	rq->rq_flags |= RQF_STARTED;
 	return rq;
 }
@@ -640,7 +593,6 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
 	dd->fifo_batch = fifo_batch;
 	dd->prio_aging_expire = prio_aging_expire;
 	spin_lock_init(&dd->lock);
-	spin_lock_init(&dd->zone_lock);
 
 	q->elevator = eq;
 	return 0;
@@ -716,17 +668,13 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 	u8 ioprio_class = IOPRIO_PRIO_CLASS(ioprio);
 	struct dd_per_prio *per_prio;
 	enum dd_prio prio;
+	bool seq_write = blk_rq_is_seq_write(rq);
 	LIST_HEAD(free);
 
 	lockdep_assert_held(&dd->lock);
 
-	/*
-	 * This may be a requeue of a write request that has locked its
-	 * target zone. If it is the case, this releases the zone lock.
-	 */
-	blk_req_zone_write_unlock(rq);
-
-	prio = ioprio_class_to_prio[ioprio_class];
+	prio = seq_write ? DD_BE_PRIO :
+		ioprio_class_to_prio[ioprio_class];
 	per_prio = &dd->per_prio[prio];
 	if (!rq->elv.priv[0]) {
 		per_prio->stats.inserted++;
@@ -740,7 +688,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 
 	trace_block_rq_insert(rq);
 
-	if (at_head) {
+	if (at_head && !seq_write) {
 		list_add(&rq->queuelist, &per_prio->dispatch);
 		rq->fifo_time = jiffies;
 	} else {
@@ -819,16 +767,6 @@ static void dd_finish_request(struct request *rq)
 		return;
 
 	atomic_inc(&per_prio->stats.completed);
-
-	if (blk_queue_is_zoned(q)) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&dd->zone_lock, flags);
-		blk_req_zone_write_unlock(rq);
-		if (!list_empty(&per_prio->fifo_list[DD_WRITE]))
-			blk_mq_sched_mark_restart_hctx(rq->mq_hctx);
-		spin_unlock_irqrestore(&dd->zone_lock, flags);
-	}
 }
 
 static bool dd_has_work_for_prio(struct dd_per_prio *per_prio)