From: Yu Kuai <yukuai3@xxxxxxxxxx> Refer to the implementation of blk_mq_tag_busy/idle(): - blk_mq_driver_tag_busy() will be used the first time when get driver tag failed; - blk_mq_driver_tag_idle() will be used when driver tag is no longer exhausted. - A new counter 'busy_queues' is added to indicate how many shared queues/hctxs are busy(drivers tags is exhausted); Tag sharing will be delayed until fail to get driver tag based on these new helpers. Signed-off-by: Yu Kuai <yukuai3@xxxxxxxxxx> --- block/blk-mq-debugfs.c | 2 ++ block/blk-mq-tag.c | 53 +++++++++++++++++++++++++++++++++++++++++- block/blk-mq.c | 9 +++++-- block/blk-mq.h | 25 ++++++++++++++++---- include/linux/blk-mq.h | 7 ++++-- include/linux/blkdev.h | 1 + 6 files changed, 88 insertions(+), 9 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 1d460119f5b3..170bc2236e81 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -417,6 +417,8 @@ static void blk_mq_debugfs_tags_show(struct seq_file *m, seq_printf(m, "nr_reserved_tags=%u\n", tags->nr_reserved_tags); seq_printf(m, "active_queues=%d\n", READ_ONCE(tags->ctl.active_queues)); + seq_printf(m, "busy_queues=%d\n", + READ_ONCE(tags->ctl.busy_queues)); seq_puts(m, "\nbitmap_tags:\n"); sbitmap_queue_show(&tags->bitmap_tags, m); diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 261769251282..cd13d8e512f7 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -165,6 +165,51 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) blk_mq_tag_wakeup_all(tags, false); } +void __blk_mq_driver_tag_busy(struct blk_mq_hw_ctx *hctx) +{ + unsigned int users; + struct blk_mq_tags *tags = hctx->tags; + + if (blk_mq_is_shared_tags(hctx->flags)) { + struct request_queue *q = hctx->queue; + + if (test_bit(QUEUE_FLAG_HCTX_BUSY, &q->queue_flags) || + test_and_set_bit(QUEUE_FLAG_HCTX_BUSY, &q->queue_flags)) + return; + } else { + if (test_bit(BLK_MQ_S_DTAG_BUSY, &hctx->state) || + test_and_set_bit(BLK_MQ_S_DTAG_BUSY, &hctx->state)) + return; + } + + spin_lock_irq(&tags->lock); + users = tags->ctl.busy_queues + 1; + WRITE_ONCE(tags->ctl.busy_queues, users); + spin_unlock_irq(&tags->lock); +} + +void __blk_mq_driver_tag_idle(struct blk_mq_hw_ctx *hctx) +{ + unsigned int users; + struct blk_mq_tags *tags = hctx->tags; + + if (blk_mq_is_shared_tags(hctx->flags)) { + struct request_queue *q = hctx->queue; + + if (!test_and_clear_bit(QUEUE_FLAG_HCTX_BUSY, + &q->queue_flags)) + return; + } else { + if (!test_and_clear_bit(BLK_MQ_S_DTAG_BUSY, &hctx->state)) + return; + } + + spin_lock_irq(&tags->lock); + users = tags->ctl.busy_queues - 1; + WRITE_ONCE(tags->ctl.busy_queues, users); + spin_unlock_irq(&tags->lock); +} + static int __blk_mq_get_tag(struct blk_mq_alloc_data *data, struct sbitmap_queue *bt) { @@ -218,8 +263,11 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) if (tag != BLK_MQ_NO_TAG) goto found_tag; - if (data->flags & BLK_MQ_REQ_NOWAIT) + if (data->flags & BLK_MQ_REQ_NOWAIT) { + if (!(data->rq_flags & RQF_SCHED_TAGS)) + blk_mq_driver_tag_busy(data->hctx); return BLK_MQ_NO_TAG; + } ws = bt_wait_ptr(bt, data->hctx); do { @@ -246,6 +294,9 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) if (tag != BLK_MQ_NO_TAG) break; + if (!(data->rq_flags & RQF_SCHED_TAGS)) + blk_mq_driver_tag_busy(data->hctx); + bt_prev = bt; io_schedule(); diff --git a/block/blk-mq.c b/block/blk-mq.c index 8775616bc85c..a106533f063f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1668,8 +1668,10 @@ static void blk_mq_timeout_work(struct work_struct *work) */ queue_for_each_hw_ctx(q, hctx, i) { /* the hctx may be unmapped, so check it here */ - if (blk_mq_hw_queue_mapped(hctx)) + if (blk_mq_hw_queue_mapped(hctx)) { blk_mq_tag_idle(hctx); + blk_mq_driver_tag_idle(hctx); + } } } blk_queue_exit(q); @@ -3594,8 +3596,10 @@ static void blk_mq_exit_hctx(struct request_queue *q, { struct request *flush_rq = hctx->fq->flush_rq; - if (blk_mq_hw_queue_mapped(hctx)) + if (blk_mq_hw_queue_mapped(hctx)) { blk_mq_tag_idle(hctx); + blk_mq_driver_tag_idle(hctx); + } if (blk_queue_init_done(q)) blk_mq_clear_flush_rq_mapping(set->tags[hctx_idx], @@ -3931,6 +3935,7 @@ static void queue_set_hctx_shared(struct request_queue *q, bool shared) hctx->flags |= BLK_MQ_F_TAG_QUEUE_SHARED; } else { blk_mq_tag_idle(hctx); + blk_mq_driver_tag_idle(hctx); hctx->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED; } } diff --git a/block/blk-mq.h b/block/blk-mq.h index 5c0d19562848..3e555af1de49 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -195,8 +195,10 @@ static inline struct sbq_wait_state *bt_wait_ptr(struct sbitmap_queue *bt, return sbq_wait_ptr(bt, &hctx->wait_index); } -void __blk_mq_tag_busy(struct blk_mq_hw_ctx *); -void __blk_mq_tag_idle(struct blk_mq_hw_ctx *); +void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx); +void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx); +void __blk_mq_driver_tag_busy(struct blk_mq_hw_ctx *hctx); +void __blk_mq_driver_tag_idle(struct blk_mq_hw_ctx *hctx); static inline void blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) { @@ -210,6 +212,18 @@ static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) __blk_mq_tag_idle(hctx); } +static inline void blk_mq_driver_tag_busy(struct blk_mq_hw_ctx *hctx) +{ + if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) + __blk_mq_driver_tag_busy(hctx); +} + +static inline void blk_mq_driver_tag_idle(struct blk_mq_hw_ctx *hctx) +{ + if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) + __blk_mq_driver_tag_idle(hctx); +} + static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags, unsigned int tag) { @@ -293,7 +307,8 @@ static inline void __blk_mq_sub_active_requests(struct blk_mq_hw_ctx *hctx, struct shared_tag_info *info = blk_mq_is_shared_tags(hctx->flags) ? &hctx->queue->shared_tag_info : &hctx->shared_tag_info; - atomic_sub(val, &info->active_tags); + if (!atomic_sub_return(val, &info->active_tags)) + blk_mq_driver_tag_idle(hctx); } static inline void __blk_mq_dec_active_requests(struct blk_mq_hw_ctx *hctx) @@ -354,8 +369,10 @@ bool __blk_mq_alloc_driver_tag(struct request *rq); static inline bool blk_mq_get_driver_tag(struct request *rq) { - if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_alloc_driver_tag(rq)) + if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_alloc_driver_tag(rq)) { + blk_mq_driver_tag_busy(rq->mq_hctx); return false; + } return true; } diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index c93955f5f28f..9182ceca8c7a 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -666,10 +666,11 @@ enum { BLK_MQ_S_STOPPED = 0, BLK_MQ_S_TAG_ACTIVE = 1, - BLK_MQ_S_SCHED_RESTART = 2, + BLK_MQ_S_DTAG_BUSY = 2, + BLK_MQ_S_SCHED_RESTART = 3, /* hw queue is inactive after all its CPUs become offline */ - BLK_MQ_S_INACTIVE = 3, + BLK_MQ_S_INACTIVE = 4, BLK_MQ_MAX_DEPTH = 10240, @@ -728,6 +729,8 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, struct tag_sharing_ctl { unsigned int active_queues; + /* The number of shared queues/hctxs with exhausted driver tags. */ + unsigned int busy_queues; /* * If driver tags is shared for multiple queue/hctx, this is the head of * a list with request_queue/hctx->shared_tag_info.node entries. diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b364d65fe4e5..8fd6a0a92233 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -552,6 +552,7 @@ struct request_queue { #define QUEUE_FLAG_DAX 19 /* device supports DAX */ #define QUEUE_FLAG_STATS 20 /* track IO start and completion times */ #define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */ +#define QUEUE_FLAG_HCTX_BUSY 23 /* driver tag is exhausted for at least one blk-mq hctx */ #define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */ #define QUEUE_FLAG_PCI_P2PDMA 25 /* device supports PCI p2p requests */ #define QUEUE_FLAG_ZONE_RESETALL 26 /* supports Zone Reset All */ -- 2.39.2