Zoned devices request sequential writing on the same zone. That means if 2 requests on the saem zone, the lower pos request need to dispatch to device first. While different priority has it's own tree & list, request with high priority will be disptch first. So if requestA & requestB are on the same zone. RequestA is BE and pos is X+0. ReqeustB is RT and pos is X+1. RequestB will be disptched before requestA, which got an ERROR from zoned device. This is found in a practice scenario when using F2FS on zoned device. And it is very easy to reproduce: 1. Use fsstress to run 8 test processes 2. Use ionice to change 4/8 processes to RT priority Fixes: c807ab520fc3 ("block/mq-deadline: Add I/O priority support") Cc: <stable@xxxxxxxxxxxxxxx> Signed-off-by: Wu Bo <bo.wu@xxxxxxxx> --- block/mq-deadline.c | 31 +++++++++++++++++++++++++++++++ include/linux/blk-mq.h | 15 +++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 02a916ba62ee..6a05dd86e8ca 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -539,6 +539,37 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd, if (started_after(dd, rq, latest_start)) return NULL; + if (!blk_rq_is_seq_zoned_write(rq)) + goto skip_check; + /* + * To ensure sequential writing, check the lower priority class to see + * if there is a request on the same zone and need to be dispatched + * first + */ + ioprio_class = dd_rq_ioclass(rq); + prio = ioprio_class_to_prio[ioprio_class]; + prio++; + for (; prio <= DD_PRIO_MAX; prio++) { + struct request *temp_rq; + unsigned long flags; + bool can_dispatch; + + if (!dd_queued(dd, prio)) + continue; + + temp_rq = deadline_from_pos(&dd->per_prio[prio], data_dir, blk_rq_pos(rq)); + if (temp_rq && blk_req_zone_in_one(temp_rq, rq) && + blk_rq_pos(temp_rq) < blk_rq_pos(rq)) { + spin_lock_irqsave(&dd->zone_lock, flags); + can_dispatch = blk_req_can_dispatch_to_zone(temp_rq); + spin_unlock_irqrestore(&dd->zone_lock, flags); + if (!can_dispatch) + return NULL; + rq = temp_rq; + per_prio = &dd->per_prio[prio]; + } + } +skip_check: /* * rq is the selected appropriate request. */ diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index d3d8fd8e229b..bca1e639e0f3 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -1202,6 +1202,15 @@ static inline bool blk_req_can_dispatch_to_zone(struct request *rq) return true; return !blk_req_zone_is_write_locked(rq); } + +static inline bool blk_req_zone_in_one(struct request *rq_a, + struct request *rq_b) +{ + unsigned int zone_sectors = rq_a->q->limits.chunk_sectors; + + return round_down(blk_rq_pos(rq_a), zone_sectors) == + round_down(blk_rq_pos(rq_b), zone_sectors); +} #else /* CONFIG_BLK_DEV_ZONED */ static inline bool blk_rq_is_seq_zoned_write(struct request *rq) { @@ -1229,6 +1238,12 @@ static inline bool blk_req_can_dispatch_to_zone(struct request *rq) { return true; } + +static inline bool blk_req_zone_in_one(struct request *rq_a, + struct request *rq_b) +{ + return false; +} #endif /* CONFIG_BLK_DEV_ZONED */ #endif /* BLK_MQ_H */ -- 2.35.3