When multiple hctx share one tagset. The wake_batch is calculated during initialization by queue_depth. But when multiple hctx share one tagset. The queue depth assigned to each user may be smaller than wakup_batch. This may cause the waiting queue to fail to wakup and leads to Hang. Fix this by recalculating wake_batch when inc or dec active_queues. Fixes: 0d2602ca30e41 ("blk-mq: improve support for shared tags maps") Signed-off-by: Laibin Qiu <qiulaibin@xxxxxxxxxx> --- block/blk-mq-tag.c | 44 +++++++++++++++++++++++++++++++++++++++-- include/linux/sbitmap.h | 8 ++++++++ lib/sbitmap.c | 3 ++- 3 files changed, 52 insertions(+), 3 deletions(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 86f87346232a..d02f5ac0004c 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -16,6 +16,27 @@ #include "blk-mq-sched.h" #include "blk-mq-tag.h" +static void bt_update_wake_batch(struct sbitmap_queue *bt, unsigned int users) +{ + unsigned int depth; + + depth = max((bt->sb.depth + users - 1) / users, 4U); + sbitmap_queue_update_wake_batch(bt, depth); +} + +/* + * Recalculate wakeup batch when tag is shared by hctx. + */ +static void blk_mq_update_wake_batch(struct sbitmap_queue *bitmap_tags, + struct sbitmap_queue *breserved_tags, unsigned int users) +{ + if (!users) + return; + + bt_update_wake_batch(bitmap_tags, users); + bt_update_wake_batch(breserved_tags, users); +} + /* * If a previously inactive queue goes active, bump the active user count. * We need to do this before try to allocate driver tag, then even if fail @@ -24,17 +45,29 @@ */ bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) { + unsigned int users; + if (blk_mq_is_sbitmap_shared(hctx->flags)) { struct request_queue *q = hctx->queue; struct blk_mq_tag_set *set = q->tag_set; if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) && - !test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) + !test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) { atomic_inc(&set->active_queues_shared_sbitmap); + + users = atomic_read(&set->active_queues_shared_sbitmap); + blk_mq_update_wake_batch(&set->__bitmap_tags, + &set->__breserved_tags, users); + } } else { if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) && - !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) + !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) { atomic_inc(&hctx->tags->active_queues); + + users = atomic_read(&hctx->tags->active_queues); + blk_mq_update_wake_batch(&hctx->tags->__bitmap_tags, + &hctx->tags->__breserved_tags, users); + } } return true; @@ -59,16 +92,23 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) struct blk_mq_tags *tags = hctx->tags; struct request_queue *q = hctx->queue; struct blk_mq_tag_set *set = q->tag_set; + unsigned int users; if (blk_mq_is_sbitmap_shared(hctx->flags)) { if (!test_and_clear_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) return; atomic_dec(&set->active_queues_shared_sbitmap); + users = atomic_read(&set->active_queues_shared_sbitmap); + blk_mq_update_wake_batch(&set->__bitmap_tags, + &set->__breserved_tags, users); } else { if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) return; atomic_dec(&tags->active_queues); + users = atomic_read(&tags->active_queues); + blk_mq_update_wake_batch(&tags->__bitmap_tags, + &tags->__breserved_tags, users); } blk_mq_tag_wakeup_all(tags, false); diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 2713e689ad66..d49e4f054bfe 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -406,6 +406,14 @@ static inline void sbitmap_queue_free(struct sbitmap_queue *sbq) sbitmap_free(&sbq->sb); } +/** + * sbitmap_queue_update_wake_batch() - Recalucate wake batch. + * @sbq: Bitmap queue. + * @depth: New number of queue depth. + */ +void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, + unsigned int depth); + /** * sbitmap_queue_resize() - Resize a &struct sbitmap_queue. * @sbq: Bitmap queue to resize. diff --git a/lib/sbitmap.c b/lib/sbitmap.c index b25db9be938a..bbe1d663763f 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -457,7 +457,7 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, } EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); -static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, +void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, unsigned int depth) { unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth); @@ -475,6 +475,7 @@ static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, atomic_set(&sbq->ws[i].wait_cnt, 1); } } +EXPORT_SYMBOL_GPL(sbitmap_queue_update_wake_batch); void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) { -- 2.22.0