Measurements have shown that limiting the queue depth to one for zoned writes has a significant negative performance impact on zoned UFS devices. Hence this patch that disables zone locking from the mq-deadline scheduler for storage controllers that support pipelining zoned writes. This patch is based on the following assumptions: - Applications submit write requests to sequential write required zones in order. - The I/O priority of all pipelined write requests is the same per zone. - Pipelined zoned write requests are submitted to a single hardware queue per zone. - If such write requests get reordered by the software or hardware queue mechanism, nr_requests - 1 retries are sufficient to reorder the write requests. - It happens infrequently that zoned write requests are reordered by the block layer. - Either no I/O scheduler is used or an I/O scheduler is used that submits write requests per zone in LBA order. Cc: Damien Le Moal <damien.lemoal@xxxxxxx> Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx> --- block/blk-zoned.c | 3 ++- block/mq-deadline.c | 18 ++++++++++++------ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 9da8cf1bb378..63c730a18ac4 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -513,7 +513,8 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx, break; case BLK_ZONE_TYPE_SEQWRITE_REQ: case BLK_ZONE_TYPE_SEQWRITE_PREF: - if (!args->seq_zones_wlock) { + if (!blk_queue_pipeline_zoned_writes(q) && + !args->seq_zones_wlock) { args->seq_zones_wlock = blk_alloc_zone_bitmap(q->node, args->nr_zones); if (!args->seq_zones_wlock) diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 1a9e835e816c..aaef07a55984 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -292,7 +292,8 @@ deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio, return NULL; rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next); - if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q)) + if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q) || + blk_queue_pipeline_zoned_writes(rq->q)) return rq; /* @@ -326,7 +327,8 @@ deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio, if (!rq) return NULL; - if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q)) + if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q) || + blk_queue_pipeline_zoned_writes(rq->q)) return rq; /* @@ -445,8 +447,9 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd, } /* - * For a zoned block device, if we only have writes queued and none of - * them can be dispatched, rq will be NULL. + * For a zoned block device that requires write serialization, if we + * only have writes queued and none of them can be dispatched, rq will + * be NULL. */ if (!rq) return NULL; @@ -719,6 +722,8 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, u8 ioprio_class = IOPRIO_PRIO_CLASS(ioprio); struct dd_per_prio *per_prio; enum dd_prio prio; + bool pipelined_seq_write = blk_queue_pipeline_zoned_writes(q) && + blk_rq_is_seq_zone_write(rq); LIST_HEAD(free); lockdep_assert_held(&dd->lock); @@ -743,7 +748,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, trace_block_rq_insert(rq); - if (at_head) { + if (at_head && !pipelined_seq_write) { list_add(&rq->queuelist, &per_prio->dispatch); rq->fifo_time = jiffies; } else { @@ -823,7 +828,8 @@ static void dd_finish_request(struct request *rq) atomic_inc(&per_prio->stats.completed); - if (blk_queue_is_zoned(q)) { + if (blk_queue_is_zoned(rq->q) && + !blk_queue_pipeline_zoned_writes(q)) { unsigned long flags; spin_lock_irqsave(&dd->zone_lock, flags);