When a LLDD can't dispatch a request to a specific zone, it will return BLK_STS_ZONE_RESOURCE indicating this request needs to be delayed, e.g. because the zone it will be dispatched to is still write-locked. If this happens set the request aside in a local list to continue trying dispatching requests such as READ requests or a WRITE/ZONE_APPEND requests targetting other zones. This way we can still keep a high queue depth without starving other requests even if one request can't be served due to zone write-locking. All requests put aside in the local list due to BLK_STS_ZONE_RESOURCE are placed back at the head of the dispatch list for retrying the next time the device queues are run again. Signed-off-by: Johannes Thumshirn <johannes.thumshirn@xxxxxxx> --- block/blk-mq.c | 27 +++++++++++++++++++++++++++ drivers/scsi/scsi_lib.c | 1 + 2 files changed, 28 insertions(+) diff --git a/block/blk-mq.c b/block/blk-mq.c index f7ab75ef4d0e..89eb062825a7 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1195,6 +1195,19 @@ static void blk_mq_handle_dev_resource(struct request *rq, __blk_mq_requeue_request(rq); } +static void blk_mq_handle_zone_resource(struct request *rq, + struct list_head *zone_list) +{ + /* + * If we end up here it is because we cannot dispatch a request to a + * specific zone due to LLD level zone-write locking or other zone + * related resource not being available. In this case, set the request + * aside in zone_list for retrying it later. + */ + list_add(&rq->queuelist, zone_list); + __blk_mq_requeue_request(rq); +} + /* * Returns true if we did some work AND can potentially do more. */ @@ -1206,6 +1219,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, bool no_tag = false; int errors, queued; blk_status_t ret = BLK_STS_OK; + LIST_HEAD(zone_list); if (list_empty(list)) return false; @@ -1264,6 +1278,16 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) { blk_mq_handle_dev_resource(rq, list); break; + } else if (ret == BLK_STS_ZONE_RESOURCE) { + /* + * Move the request to zone_list and keep going through + * the dipatch list to find more requests the drive + * accepts. + */ + blk_mq_handle_zone_resource(rq, &zone_list); + if (list_empty(list)) + break; + continue; } if (unlikely(ret != BLK_STS_OK)) { @@ -1275,6 +1299,9 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, queued++; } while (!list_empty(list)); + if (!list_empty(&zone_list)) + list_splice_tail_init(&zone_list, list); + hctx->dispatched[queued_to_index(queued)]++; /* diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 610ee41fa54c..ea327f320b7f 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1706,6 +1706,7 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, case BLK_STS_OK: break; case BLK_STS_RESOURCE: + case BLK_STS_ZONE_RESOURCE: if (atomic_read(&sdev->device_busy) || scsi_device_blocked(sdev)) ret = BLK_STS_DEV_RESOURCE; -- 2.24.1