Re: [PATCH] blk-mq: Iterate also over sched_tags requests at blk_mq_tagset_iter()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sun, Oct 22, 2017 at 01:47:54PM +0000, Israel Rukshin wrote:
> Currently, blk_mq_tagset_iter() iterate over initial hctx tags only.
> In case scheduler is used, it doesn't iterate the hctx scheduler tags
> and the static request aren't been updated.
> For example, while using NVMe over Fabrics RDMA host, this cause us not to
> reinit the scheduler requests and thus not re-register all the memory regions
> during the tagset re-initialization in the reconnect flow.
> 
> This may lead to a memory registration error:
> "MEMREG for CQE 0xffff88044c14dce8 failed with status memory
> management operation error (6)"
> 
> Signed-off-by: Israel Rukshin <israelr@xxxxxxxxxxxx>
> Reviewed-by: Max Gurtovoy <maxg@xxxxxxxxxxxx>
> ---
> 
> The commit is based on nvme branch for 4.15 which includes
> Sagi's patches for reinit_tagset.
> 
> ---
>  block/blk-mq-sched.c   |  3 +++
>  block/blk-mq-tag.c     | 16 ++++++++++++++++
>  block/blk-mq.c         | 14 +++++++++++++-
>  include/linux/blk-mq.h |  1 +
>  4 files changed, 33 insertions(+), 1 deletion(-)
> 
> diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
> index 4ab6943..4db9797 100644
> --- a/block/blk-mq-sched.c
> +++ b/block/blk-mq-sched.c
> @@ -426,6 +426,7 @@ static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
>  		blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
>  		blk_mq_free_rq_map(hctx->sched_tags);
>  		hctx->sched_tags = NULL;
> +		set->sched_tags[hctx_idx] = NULL;
>  	}
>  }
>  
> @@ -441,6 +442,8 @@ static int blk_mq_sched_alloc_tags(struct request_queue *q,
>  	if (!hctx->sched_tags)
>  		return -ENOMEM;
>  
> +	set->sched_tags[hctx_idx] = hctx->sched_tags;
> +
>  	ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests);
>  	if (ret)
>  		blk_mq_sched_free_tags(set, hctx, hctx_idx);
> diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
> index c81b40e..c290de0 100644
> --- a/block/blk-mq-tag.c
> +++ b/block/blk-mq-tag.c
> @@ -322,6 +322,22 @@ int blk_mq_tagset_iter(struct blk_mq_tag_set *set, void *data,
>  		}
>  	}
>  
> +	for (i = 0; i < set->nr_hw_queues; i++) {
> +		struct blk_mq_tags *sched_tags = set->sched_tags[i];
> +
> +		if (!sched_tags)
> +			continue;
> +
> +		for (j = 0; j < sched_tags->nr_tags; j++) {
> +			if (!sched_tags->static_rqs[j])
> +				continue;
> +
> +			ret = fn(data, sched_tags->static_rqs[j]);
> +			if (ret)
> +				goto out;
> +		}
> +	}
> +
>  out:
>  	return ret;
>  }
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 7f01d69..d7675b7 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -2576,10 +2576,16 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
>  		return -ENOMEM;
>  
>  	ret = -ENOMEM;
> +
> +	set->sched_tags = kzalloc_node(nr_cpu_ids * sizeof(struct blk_mq_tags *),
> +				       GFP_KERNEL, set->numa_node);
> +	if (!set->sched_tags)
> +		goto out_free_tags;
> +
>  	set->mq_map = kzalloc_node(sizeof(*set->mq_map) * nr_cpu_ids,
>  			GFP_KERNEL, set->numa_node);
>  	if (!set->mq_map)
> -		goto out_free_tags;
> +		goto out_free_sched_tags;
>  
>  	ret = blk_mq_update_queue_map(set);
>  	if (ret)
> @@ -2597,6 +2603,9 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
>  out_free_mq_map:
>  	kfree(set->mq_map);
>  	set->mq_map = NULL;
> +out_free_sched_tags:
> +	kfree(set->sched_tags);
> +	set->sched_tags = NULL;
>  out_free_tags:
>  	kfree(set->tags);
>  	set->tags = NULL;
> @@ -2614,6 +2623,9 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
>  	kfree(set->mq_map);
>  	set->mq_map = NULL;
>  
> +	kfree(set->sched_tags);
> +	set->sched_tags = NULL;
> +
>  	kfree(set->tags);
>  	set->tags = NULL;
>  }
> diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
> index cfd64e5..9ec629f 100644
> --- a/include/linux/blk-mq.h
> +++ b/include/linux/blk-mq.h
> @@ -78,6 +78,7 @@ struct blk_mq_tag_set {
>  	void			*driver_data;
>  
>  	struct blk_mq_tags	**tags;
> +	struct blk_mq_tags	**sched_tags;

It isn't a good idea to put 'sched_tags' here because
scheduler tags shouldn't be shared, not like driver tags.

If you want to do something on scheduler tags, you have to
get the 'hctx' instance first.

-- 
Ming



[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux