Re: [PATCH RFC v7 05/12] blk-mq: Record nr_active_requests per queue for when using shared sbitmap

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Jun 11, 2020 at 01:29:12AM +0800, John Garry wrote:
> The per-hctx nr_active value can no longer be used to fairly assign a share
> of tag depth per request queue for when using a shared sbitmap, as it does
> not consider that the tags are shared tags over all hctx's.
> 
> For this case, record the nr_active_requests per request_queue, and make
> the judgment based on that value.
> 
> Also introduce a debugfs version of per-hctx blk_mq_debugfs_attr, omitting
> hctx_active_show() (as blk_mq_hw_ctx.nr_active is no longer maintained for
> the case of shared sbitmap) and other entries which we can add which would
> be revised specifically for when using a shared sbitmap.
> 
> Co-developed-with: Kashyap Desai <kashyap.desai@xxxxxxxxxxxx>
> Signed-off-by: John Garry <john.garry@xxxxxxxxxx>
> ---
>  block/blk-core.c       |  2 ++
>  block/blk-mq-debugfs.c | 23 ++++++++++++++++++++++-
>  block/blk-mq-tag.c     | 10 ++++++----
>  block/blk-mq.c         |  6 +++---
>  block/blk-mq.h         | 28 +++++++++++++++++++++++++++-
>  include/linux/blkdev.h |  2 ++
>  6 files changed, 62 insertions(+), 9 deletions(-)
> 
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 03252af8c82c..c622453c1363 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -529,6 +529,8 @@ struct request_queue *__blk_alloc_queue(int node_id)
>  	q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK;
>  	q->node = node_id;
>  
> +	atomic_set(&q->nr_active_requests_shared_sbitmap, 0);
> +
>  	timer_setup(&q->backing_dev_info->laptop_mode_wb_timer,
>  		    laptop_mode_timer_fn, 0);
>  	timer_setup(&q->timeout, blk_rq_timed_out_timer, 0);
> diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
> index a400b6698dff..0fa3af41ab65 100644
> --- a/block/blk-mq-debugfs.c
> +++ b/block/blk-mq-debugfs.c
> @@ -796,6 +796,23 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
>  	{},
>  };
>  
> +static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_shared_sbitmap_attrs[] = {
> +	{"state", 0400, hctx_state_show},
> +	{"flags", 0400, hctx_flags_show},
> +	{"dispatch", 0400, .seq_ops = &hctx_dispatch_seq_ops},
> +	{"busy", 0400, hctx_busy_show},
> +	{"ctx_map", 0400, hctx_ctx_map_show},
> +	{"sched_tags", 0400, hctx_sched_tags_show},
> +	{"sched_tags_bitmap", 0400, hctx_sched_tags_bitmap_show},
> +	{"io_poll", 0600, hctx_io_poll_show, hctx_io_poll_write},
> +	{"dispatched", 0600, hctx_dispatched_show, hctx_dispatched_write},
> +	{"queued", 0600, hctx_queued_show, hctx_queued_write},
> +	{"run", 0600, hctx_run_show, hctx_run_write},
> +	{"active", 0400, hctx_active_show},
> +	{"dispatch_busy", 0400, hctx_dispatch_busy_show},
> +	{}
> +};

You may use macro or whatever to avoid so the duplication.

> +
>  static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = {
>  	{"default_rq_list", 0400, .seq_ops = &ctx_default_rq_list_seq_ops},
>  	{"read_rq_list", 0400, .seq_ops = &ctx_read_rq_list_seq_ops},
> @@ -878,13 +895,17 @@ void blk_mq_debugfs_register_hctx(struct request_queue *q,
>  				  struct blk_mq_hw_ctx *hctx)
>  {
>  	struct blk_mq_ctx *ctx;
> +	struct blk_mq_tag_set *set = q->tag_set;
>  	char name[20];
>  	int i;
>  
>  	snprintf(name, sizeof(name), "hctx%u", hctx->queue_num);
>  	hctx->debugfs_dir = debugfs_create_dir(name, q->debugfs_dir);
>  
> -	debugfs_create_files(hctx->debugfs_dir, hctx, blk_mq_debugfs_hctx_attrs);
> +	if (blk_mq_is_sbitmap_shared(set))
> +		debugfs_create_files(hctx->debugfs_dir, hctx, blk_mq_debugfs_hctx_shared_sbitmap_attrs);
> +	else
> +		debugfs_create_files(hctx->debugfs_dir, hctx, blk_mq_debugfs_hctx_attrs);
>  
>  	hctx_for_each_ctx(hctx, ctx, i)
>  		blk_mq_debugfs_register_ctx(hctx, ctx);
> diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
> index 92843e3e1a2a..7db16e49f6f6 100644
> --- a/block/blk-mq-tag.c
> +++ b/block/blk-mq-tag.c
> @@ -60,9 +60,11 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
>   * For shared tag users, we track the number of currently active users
>   * and attempt to provide a fair share of the tag depth for each of them.
>   */
> -static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
> +static inline bool hctx_may_queue(struct blk_mq_alloc_data *data,
>  				  struct sbitmap_queue *bt)
>  {
> +	struct blk_mq_hw_ctx *hctx = data->hctx;
> +	struct request_queue *q = data->q;
>  	unsigned int depth, users;
>  
>  	if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED))
> @@ -84,15 +86,15 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
>  	 * Allow at least some tags
>  	 */
>  	depth = max((bt->sb.depth + users - 1) / users, 4U);
> -	return atomic_read(&hctx->nr_active) < depth;
> +	return __blk_mq_active_requests(hctx, q) < depth;

There is big change on 'users' too:

	users = atomic_read(&hctx->tags->active_queues);

Originally there is single hctx->tags for these HBAs, now there are many
hctx->tags, so 'users' may become much smaller than before.

Maybe '->active_queues' can be moved to tag_set for blk_mq_is_sbitmap_shared().

>  }
>  
>  static int __blk_mq_get_tag(struct blk_mq_alloc_data *data,
>  			    struct sbitmap_queue *bt)
>  {
>  	if (!(data->flags & BLK_MQ_REQ_INTERNAL) &&
> -	    !hctx_may_queue(data->hctx, bt))
> -		return BLK_MQ_NO_TAG;
> +	    !hctx_may_queue(data, bt))
> +		return -1;

BLK_MQ_NO_TAG should have been returned.

>  	if (data->shallow_depth)
>  		return __sbitmap_queue_get_shallow(bt, data->shallow_depth);
>  	else
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 77120dd4e4d5..0f7e062a1665 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -283,7 +283,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
>  	} else {
>  		if (data->hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) {
>  			rq_flags = RQF_MQ_INFLIGHT;
> -			atomic_inc(&data->hctx->nr_active);
> +			__blk_mq_inc_active_requests(data->hctx, data->q);
>  		}
>  		rq->tag = tag;
>  		rq->internal_tag = BLK_MQ_NO_TAG;
> @@ -527,7 +527,7 @@ void blk_mq_free_request(struct request *rq)
>  
>  	ctx->rq_completed[rq_is_sync(rq)]++;
>  	if (rq->rq_flags & RQF_MQ_INFLIGHT)
> -		atomic_dec(&hctx->nr_active);
> +		__blk_mq_dec_active_requests(hctx, q);
>  
>  	if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq)))
>  		laptop_io_completion(q->backing_dev_info);
> @@ -1073,7 +1073,7 @@ bool blk_mq_get_driver_tag(struct request *rq)
>  	if (rq->tag >= 0) {
>  		if (shared) {
>  			rq->rq_flags |= RQF_MQ_INFLIGHT;
> -			atomic_inc(&data.hctx->nr_active);
> +			__blk_mq_inc_active_requests(rq->mq_hctx, rq->q);
>  		}
>  		data.hctx->tags->rqs[rq->tag] = rq;
>  	}
> diff --git a/block/blk-mq.h b/block/blk-mq.h
> index 1a283c707215..9c1e612c2298 100644
> --- a/block/blk-mq.h
> +++ b/block/blk-mq.h
> @@ -202,6 +202,32 @@ static inline bool blk_mq_get_dispatch_budget(struct blk_mq_hw_ctx *hctx)
>  	return true;
>  }
>  
> +static inline void __blk_mq_inc_active_requests(struct blk_mq_hw_ctx *hctx,
> +						struct request_queue *q)
> +{
> +	if (blk_mq_is_sbitmap_shared(q->tag_set))
> +		atomic_inc(&q->nr_active_requests_shared_sbitmap);
> +	else
> +		atomic_inc(&hctx->nr_active);
> +}
> +
> +static inline void __blk_mq_dec_active_requests(struct blk_mq_hw_ctx *hctx,
> +						struct request_queue *q)
> +{
> +	if (blk_mq_is_sbitmap_shared(q->tag_set))
> +		atomic_dec(&q->nr_active_requests_shared_sbitmap);
> +	else
> +		atomic_dec(&hctx->nr_active);
> +}
> +
> +static inline int __blk_mq_active_requests(struct blk_mq_hw_ctx *hctx,
> +					   struct request_queue *q)
> +{
> +	if (blk_mq_is_sbitmap_shared(q->tag_set))

I'd suggest to add one hctx version of blk_mq_is_sbitmap_shared() since
q->tag_set is seldom used in fast path, and hctx->flags is more
efficient than tag_set->flags.


Thanks, 
Ming




[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [SCSI Target Devel]     [Linux SCSI Target Infrastructure]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Linux IIO]     [Samba]     [Device Mapper]

  Powered by Linux