Re: [PATCH -next V5] blk-mq: fix tag_get wait task can't be awakened

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Jan 13, 2022 at 10:55:36AM +0800, Laibin Qiu wrote:
> In case of shared tags, there might be more than one hctx which
> allocates from the same tags, and each hctx is limited to allocate at
> most:
>         hctx_max_depth = max((bt->sb.depth + users - 1) / users, 4U);
> 
> tag idle detection is lazy, and may be delayed for 30sec, so there
> could be just one real active hctx(queue) but all others are actually
> idle and still accounted as active because of the lazy idle detection.
> Then if wake_batch is > hctx_max_depth, driver tag allocation may wait
> forever on this real active hctx.
> 
> Fix this by recalculating wake_batch when inc or dec active_queues.

FWIW,
Reviewed-by: Andy Shevchenko <andriy.shevchenko@xxxxxxxxxxxxxxx>

Thanks!

> Fixes: 0d2602ca30e41 ("blk-mq: improve support for shared tags maps")
> Suggested-by: Ming Lei <ming.lei@xxxxxxxxxx>
> Suggested-by: John Garry <john.garry@xxxxxxxxxx>
> Signed-off-by: Laibin Qiu <qiulaibin@xxxxxxxxxx>
> ---
>  block/blk-mq-tag.c      | 40 +++++++++++++++++++++++++++++++++-------
>  include/linux/sbitmap.h | 11 +++++++++++
>  lib/sbitmap.c           | 25 ++++++++++++++++++++++---
>  3 files changed, 66 insertions(+), 10 deletions(-)
> 
> diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
> index e55a6834c9a6..845f74e8dd7b 100644
> --- a/block/blk-mq-tag.c
> +++ b/block/blk-mq-tag.c
> @@ -16,6 +16,21 @@
>  #include "blk-mq-sched.h"
>  #include "blk-mq-tag.h"
>  
> +/*
> + * Recalculate wakeup batch when tag is shared by hctx.
> + */
> +static void blk_mq_update_wake_batch(struct blk_mq_tags *tags,
> +		unsigned int users)
> +{
> +	if (!users)
> +		return;
> +
> +	sbitmap_queue_recalculate_wake_batch(&tags->bitmap_tags,
> +			users);
> +	sbitmap_queue_recalculate_wake_batch(&tags->breserved_tags,
> +			users);
> +}
> +
>  /*
>   * If a previously inactive queue goes active, bump the active user count.
>   * We need to do this before try to allocate driver tag, then even if fail
> @@ -24,18 +39,26 @@
>   */
>  bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
>  {
> +	unsigned int users;
> +
>  	if (blk_mq_is_shared_tags(hctx->flags)) {
>  		struct request_queue *q = hctx->queue;
>  
> -		if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) &&
> -		    !test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
> -			atomic_inc(&hctx->tags->active_queues);
> +		if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) ||
> +		    test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) {
> +			return true;
> +		}
>  	} else {
> -		if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
> -		    !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
> -			atomic_inc(&hctx->tags->active_queues);
> +		if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) ||
> +		    test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) {
> +			return true;
> +		}
>  	}
>  
> +	users = atomic_inc_return(&hctx->tags->active_queues);
> +
> +	blk_mq_update_wake_batch(hctx->tags, users);
> +
>  	return true;
>  }
>  
> @@ -56,6 +79,7 @@ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
>  void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
>  {
>  	struct blk_mq_tags *tags = hctx->tags;
> +	unsigned int users;
>  
>  	if (blk_mq_is_shared_tags(hctx->flags)) {
>  		struct request_queue *q = hctx->queue;
> @@ -68,7 +92,9 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
>  			return;
>  	}
>  
> -	atomic_dec(&tags->active_queues);
> +	users = atomic_dec_return(&tags->active_queues);
> +
> +	blk_mq_update_wake_batch(tags, users);
>  
>  	blk_mq_tag_wakeup_all(tags, false);
>  }
> diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
> index fc0357a6e19b..95df357ec009 100644
> --- a/include/linux/sbitmap.h
> +++ b/include/linux/sbitmap.h
> @@ -415,6 +415,17 @@ static inline void sbitmap_queue_free(struct sbitmap_queue *sbq)
>  	sbitmap_free(&sbq->sb);
>  }
>  
> +/**
> + * sbitmap_queue_recalculate_wake_batch() - Recalculate wake batch
> + * @sbq: Bitmap queue to recalculate wake batch.
> + * @users: Number of shares.
> + *
> + * Like sbitmap_queue_update_wake_batch(), this will calculate wake batch
> + * by depth. This interface is for HCTX shared tags or queue shared tags.
> + */
> +void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
> +					    unsigned int users);
> +
>  /**
>   * sbitmap_queue_resize() - Resize a &struct sbitmap_queue.
>   * @sbq: Bitmap queue to resize.
> diff --git a/lib/sbitmap.c b/lib/sbitmap.c
> index 2709ab825499..6220fa67fb7e 100644
> --- a/lib/sbitmap.c
> +++ b/lib/sbitmap.c
> @@ -457,10 +457,9 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
>  }
>  EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
>  
> -static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
> -					    unsigned int depth)
> +static inline void __sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
> +					    unsigned int wake_batch)
>  {
> -	unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth);
>  	int i;
>  
>  	if (sbq->wake_batch != wake_batch) {
> @@ -476,6 +475,26 @@ static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
>  	}
>  }
>  
> +static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
> +					    unsigned int depth)
> +{
> +	unsigned int wake_batch;
> +
> +	wake_batch = sbq_calc_wake_batch(sbq, depth);
> +	__sbitmap_queue_update_wake_batch(sbq, wake_batch);
> +}
> +
> +void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
> +					    unsigned int users)
> +{
> +	unsigned int wake_batch;
> +
> +	wake_batch = clamp_val((sbq->sb.depth + users - 1) /
> +			users, 4, SBQ_WAKE_BATCH);
> +	__sbitmap_queue_update_wake_batch(sbq, wake_batch);
> +}
> +EXPORT_SYMBOL_GPL(sbitmap_queue_recalculate_wake_batch);
> +
>  void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
>  {
>  	sbitmap_queue_update_wake_batch(sbq, depth);
> -- 
> 2.22.0
> 

-- 
With Best Regards,
Andy Shevchenko





[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux