The fair sharing algorithm has a negative performance impact for storage devices for which the full queue depth is required to reach peak performance, e.g. UFS devices. This is because it takes long after a request queue became inactive until tags are reassigned to the active request queue(s). Since making tag sharing fair is not needed if the request processing latency is similar for all request queues, introduce a function for configuring fair tag sharing. Increase BLK_MQ_F_ALLOC_POLICY_START_BIT to prevent that the fair tag sharing flag overlaps with the tag allocation policy. Cc: Christoph Hellwig <hch@xxxxxx> Cc: Martin K. Petersen <martin.petersen@xxxxxxxxxx> Cc: Ming Lei <ming.lei@xxxxxxxxxx> Cc: Keith Busch <kbusch@xxxxxxxxxx> Cc: Damien Le Moal <damien.lemoal@xxxxxxxxxxxxxxxxxx> Cc: Yu Kuai <yukuai1@xxxxxxxxxxxxxxx> Cc: Ed Tsai <ed.tsai@xxxxxxxxxxxx> Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx> --- block/blk-mq-debugfs.c | 1 + block/blk-mq.c | 28 ++++++++++++++++++++++++++++ block/blk-mq.h | 3 ++- include/linux/blk-mq.h | 6 ++++-- 4 files changed, 35 insertions(+), 3 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 5cbeb9344f2f..f41408103106 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -198,6 +198,7 @@ static const char *const hctx_flag_name[] = { HCTX_FLAG_NAME(NO_SCHED), HCTX_FLAG_NAME(STACKING), HCTX_FLAG_NAME(TAG_HCTX_SHARED), + HCTX_FLAG_NAME(DISABLE_FAIR_TAG_SHARING), }; #undef HCTX_FLAG_NAME diff --git a/block/blk-mq.c b/block/blk-mq.c index b8093155df8d..206295606cec 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4569,6 +4569,34 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set) } EXPORT_SYMBOL(blk_mq_free_tag_set); +/* + * Enable or disable fair tag sharing for all request queues associated with + * a tag set. + */ +void blk_mq_update_fair_sharing(struct blk_mq_tag_set *set, bool enable) +{ + const unsigned int DFTS_BIT = ilog2(BLK_MQ_F_DISABLE_FAIR_TAG_SHARING); + struct blk_mq_hw_ctx *hctx; + struct request_queue *q; + unsigned long i; + + /* + * Serialize against blk_mq_update_nr_hw_queues() and + * blk_mq_realloc_hw_ctxs(). + */ + mutex_lock(&set->tag_list_lock); + list_for_each_entry(q, &set->tag_list, tag_set_list) + blk_mq_freeze_queue(q); + assign_bit(DFTS_BIT, &set->flags, !enable); + list_for_each_entry(q, &set->tag_list, tag_set_list) + queue_for_each_hw_ctx(q, hctx, i) + assign_bit(DFTS_BIT, &hctx->flags, !enable); + list_for_each_entry(q, &set->tag_list, tag_set_list) + blk_mq_unfreeze_queue(q); + mutex_unlock(&set->tag_list_lock); +} +EXPORT_SYMBOL(blk_mq_update_fair_sharing); + int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) { struct blk_mq_tag_set *set = q->tag_set; diff --git a/block/blk-mq.h b/block/blk-mq.h index f75a9ecfebde..eda6bd0611ea 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -416,7 +416,8 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, { unsigned int depth, users; - if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) + if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) || + (hctx->flags & BLK_MQ_F_DISABLE_FAIR_TAG_SHARING)) return true; /* diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 1ab3081c82ed..ddda190b5c24 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -503,7 +503,7 @@ struct blk_mq_tag_set { unsigned int cmd_size; int numa_node; unsigned int timeout; - unsigned int flags; + unsigned long flags; void *driver_data; struct blk_mq_tags **tags; @@ -662,7 +662,8 @@ enum { * or shared hwqs instead of 'mq-deadline'. */ BLK_MQ_F_NO_SCHED_BY_DEFAULT = 1 << 7, - BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, + BLK_MQ_F_DISABLE_FAIR_TAG_SHARING = 1 << 8, + BLK_MQ_F_ALLOC_POLICY_START_BIT = 16, BLK_MQ_F_ALLOC_POLICY_BITS = 1, BLK_MQ_S_STOPPED = 0, @@ -705,6 +706,7 @@ int blk_mq_alloc_sq_tag_set(struct blk_mq_tag_set *set, const struct blk_mq_ops *ops, unsigned int queue_depth, unsigned int set_flags); void blk_mq_free_tag_set(struct blk_mq_tag_set *set); +void blk_mq_update_fair_sharing(struct blk_mq_tag_set *set, bool enable); void blk_mq_free_request(struct request *rq); int blk_rq_poll(struct request *rq, struct io_comp_batch *iob,