Measurements have shown that limiting the queue depth to one for zoned writes has a significant negative performance impact on zoned UFS devices. Hence this patch that disables zone locking from the mq-deadline scheduler for storage controllers that support pipelining zoned writes. This patch is based on the following assumptions: - Applications submit write requests to sequential write required zones in order. - It happens infrequently that zoned write requests are reordered by the block layer. - The storage controller does not reorder write requests that have been submitted to the same hardware queue. This is the case for UFS: the UFSHCI specification requires that UFS controllers process requests in order per hardware queue. - The I/O priority of all pipelined write requests is the same per zone. - Either no I/O scheduler is used or an I/O scheduler is used that submits write requests per zone in LBA order. If applications submit write requests to sequential write required zones in order, at least one of the pending requests will succeed. Hence, the number of retries that is required is at most (number of pending requests) - 1. Cc: Damien Le Moal <damien.lemoal@xxxxxxx> Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx> --- block/blk-zoned.c | 3 ++- block/mq-deadline.c | 14 +++++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index db829401d8d0..158638091e39 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -520,7 +520,8 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx, break; case BLK_ZONE_TYPE_SEQWRITE_REQ: case BLK_ZONE_TYPE_SEQWRITE_PREF: - if (!args->seq_zones_wlock) { + if (!blk_queue_pipeline_zoned_writes(q) && + !args->seq_zones_wlock) { args->seq_zones_wlock = blk_alloc_zone_bitmap(q->node, args->nr_zones); if (!args->seq_zones_wlock) diff --git a/block/mq-deadline.c b/block/mq-deadline.c index f10c2a0d18d4..e41808c0b007 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -339,7 +339,8 @@ deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio, return NULL; rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next); - if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q)) + if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q) || + blk_queue_pipeline_zoned_writes(rq->q)) return rq; /* @@ -378,7 +379,8 @@ deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio, if (!rq) return NULL; - if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q)) + if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q) || + blk_queue_pipeline_zoned_writes(rq->q)) return rq; /* @@ -503,8 +505,9 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd, } /* - * For a zoned block device, if we only have writes queued and none of - * them can be dispatched, rq will be NULL. + * For a zoned block device that requires write serialization, if we + * only have writes queued and none of them can be dispatched, rq will + * be NULL. */ if (!rq) return NULL; @@ -893,7 +896,8 @@ static void dd_finish_request(struct request *rq) atomic_inc(&per_prio->stats.completed); - if (blk_queue_is_zoned(q)) { + if (blk_queue_is_zoned(rq->q) && + !blk_queue_pipeline_zoned_writes(q)) { unsigned long flags; spin_lock_irqsave(&dd->zone_lock, flags);