Remove the code for fair tag sharing because it significantly hurts performance for UFS devices. Removing this code is safe because the legacy block layer worked fine without any equivalent fairness algorithm. This algorithm hurts performance for UFS devices because UFS devices have multiple logical units. One of these logical units (WLUN) is used to submit control commands, e.g. START STOP UNIT. If any request is submitted to the WLUN, the queue depth is reduced from 31 to 15 or lower for data LUNs. See also https://lore.kernel.org/linux-scsi/20221229030645.11558-1-ed.tsai@xxxxxxxxxxxx/ Cc: Christoph Hellwig <hch@xxxxxx> Cc: Martin K. Petersen <martin.petersen@xxxxxxxxxx> Cc: Ming Lei <ming.lei@xxxxxxxxxx> Cc: Keith Busch <kbusch@xxxxxxxxxx> Cc: Damien Le Moal <damien.lemoal@xxxxxxxxxxxxxxxxxx> Cc: Ed Tsai <ed.tsai@xxxxxxxxxxxx> Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx> --- block/blk-mq-debugfs.c | 2 -- block/blk-mq-tag.c | 8 -------- block/blk-mq.c | 3 --- block/blk-mq.h | 40 ---------------------------------------- 4 files changed, 53 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index bd942341b638..00af8d2c8816 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -426,8 +426,6 @@ static void blk_mq_debugfs_tags_show(struct seq_file *m, { seq_printf(m, "nr_tags=%u\n", tags->nr_tags); seq_printf(m, "nr_reserved_tags=%u\n", tags->nr_reserved_tags); - seq_printf(m, "active_queues=%d\n", - atomic_read(&tags->active_queues)); seq_puts(m, "\nbitmap_tags:\n"); sbitmap_queue_show(&tags->bitmap_tags, m); diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 9eb968e14d31..e9243cae41c7 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -53,8 +53,6 @@ void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state); } - users = atomic_inc_return(&hctx->tags->active_queues); - blk_mq_update_wake_batch(hctx->tags, users); } @@ -88,8 +86,6 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) return; } - users = atomic_dec_return(&tags->active_queues); - blk_mq_update_wake_batch(tags, users); blk_mq_tag_wakeup_all(tags, false); @@ -98,10 +94,6 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) static int __blk_mq_get_tag(struct blk_mq_alloc_data *data, struct sbitmap_queue *bt) { - if (!data->q->elevator && !(data->flags & BLK_MQ_REQ_RESERVED) && - !hctx_may_queue(data->hctx, bt)) - return BLK_MQ_NO_TAG; - if (data->shallow_depth) return sbitmap_queue_get_shallow(bt, data->shallow_depth); else diff --git a/block/blk-mq.c b/block/blk-mq.c index 4a7805390cf8..c570f134850a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1698,9 +1698,6 @@ static bool __blk_mq_alloc_driver_tag(struct request *rq) if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) { bt = &rq->mq_hctx->tags->breserved_tags; tag_offset = 0; - } else { - if (!hctx_may_queue(rq->mq_hctx, bt)) - return false; } tag = __sbitmap_queue_get(bt); diff --git a/block/blk-mq.h b/block/blk-mq.h index 0b2870839cdd..908f830931f0 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -334,46 +334,6 @@ static inline void blk_mq_free_requests(struct list_head *list) } } -/* - * For shared tag users, we track the number of currently active users - * and attempt to provide a fair share of the tag depth for each of them. - */ -static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, - struct sbitmap_queue *bt) -{ - unsigned int depth, users; - - if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) - return true; - - /* - * Don't try dividing an ant - */ - if (bt->sb.depth == 1) - return true; - - if (blk_mq_is_shared_tags(hctx->flags)) { - struct request_queue *q = hctx->queue; - - if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) - return true; - } else { - if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) - return true; - } - - users = atomic_read(&hctx->tags->active_queues); - - if (!users) - return true; - - /* - * Allow at least some tags - */ - depth = max((bt->sb.depth + users - 1) / users, 4U); - return __blk_mq_active_requests(hctx) < depth; -} - /* run the code block in @dispatch_ops with rcu/srcu read lock held */ #define __blk_mq_run_dispatch_ops(q, check_sleep, dispatch_ops) \ do { \