[PATCH stable] block/mq-deadline: fix different priority request on the same zone

Wu Bo <bo.wu@xxxxxxxx> · Thu, 16 May 2024 03:28:38 -0600

Zoned devices request sequential writing on the same zone. That means
if 2 requests on the saem zone, the lower pos request need to dispatch
to device first.
While different priority has it's own tree & list, request with high
priority will be disptch first.
So if requestA & requestB are on the same zone. RequestA is BE and pos
is X+0. ReqeustB is RT and pos is X+1. RequestB will be disptched before
requestA, which got an ERROR from zoned device.

This is found in a practice scenario when using F2FS on zoned device.
And it is very easy to reproduce:
1. Use fsstress to run 8 test processes
2. Use ionice to change 4/8 processes to RT priority

Fixes: c807ab520fc3 ("block/mq-deadline: Add I/O priority support")
Cc: <stable@xxxxxxxxxxxxxxx>
Signed-off-by: Wu Bo <bo.wu@xxxxxxxx>
---
 block/mq-deadline.c    | 31 +++++++++++++++++++++++++++++++
 include/linux/blk-mq.h | 15 +++++++++++++++
 2 files changed, 46 insertions(+)

diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 02a916ba62ee..6a05dd86e8ca 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -539,6 +539,37 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd,
 	if (started_after(dd, rq, latest_start))
 		return NULL;
 
+	if (!blk_rq_is_seq_zoned_write(rq))
+		goto skip_check;
+	/*
+	 * To ensure sequential writing, check the lower priority class to see
+	 * if there is a request on the same zone and need to be dispatched
+	 * first
+	 */
+	ioprio_class = dd_rq_ioclass(rq);
+	prio = ioprio_class_to_prio[ioprio_class];
+	prio++;
+	for (; prio <= DD_PRIO_MAX; prio++) {
+		struct request *temp_rq;
+		unsigned long flags;
+		bool can_dispatch;
+
+		if (!dd_queued(dd, prio))
+			continue;
+
+		temp_rq = deadline_from_pos(&dd->per_prio[prio], data_dir, blk_rq_pos(rq));
+		if (temp_rq && blk_req_zone_in_one(temp_rq, rq) &&
+				blk_rq_pos(temp_rq) < blk_rq_pos(rq)) {
+			spin_lock_irqsave(&dd->zone_lock, flags);
+			can_dispatch = blk_req_can_dispatch_to_zone(temp_rq);
+			spin_unlock_irqrestore(&dd->zone_lock, flags);
+			if (!can_dispatch)
+				return NULL;
+			rq = temp_rq;
+			per_prio = &dd->per_prio[prio];
+		}
+	}
+skip_check:
 	/*
 	 * rq is the selected appropriate request.
 	 */
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index d3d8fd8e229b..bca1e639e0f3 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -1202,6 +1202,15 @@ static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
 		return true;
 	return !blk_req_zone_is_write_locked(rq);
 }
+
+static inline bool blk_req_zone_in_one(struct request *rq_a,
+		struct request *rq_b)
+{
+	unsigned int zone_sectors = rq_a->q->limits.chunk_sectors;
+
+	return round_down(blk_rq_pos(rq_a), zone_sectors) ==
+		round_down(blk_rq_pos(rq_b), zone_sectors);
+}
 #else /* CONFIG_BLK_DEV_ZONED */
 static inline bool blk_rq_is_seq_zoned_write(struct request *rq)
 {
@@ -1229,6 +1238,12 @@ static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
 {
 	return true;
 }
+
+static inline bool blk_req_zone_in_one(struct request *rq_a,
+		struct request *rq_b)
+{
+	return false;
+}
 #endif /* CONFIG_BLK_DEV_ZONED */
 
 #endif /* BLK_MQ_H */
-- 
2.35.3