[PATCH 3/3] block: Add support for sharing tags across hardware queues

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Add a boolean member 'share_tags' in struct blk_mq_tag_set. If that member
variable is set, make all hctx->tags[] pointers identical. Implement the
necessary changes in the functions that allocate, free and resize tag sets.
Modify blk_mq_tagset_busy_iter() such that it continues to call the
callback function once per request. Modify blk_mq_queue_tag_busy_iter()
such that the callback function is only called with the correct hctx
as first argument. Modify the debugfs code such that it keeps showing only
matching tags per hctx.

This patch has been tested by running blktests on top of a kernel that
includes the following change to enable shared tags for all block drivers
except the NVMe drivers:

diff --git a/block/blk-mq.c b/block/blk-mq.c
@@ -3037,6 +3037,10 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)

        BUILD_BUG_ON(BLK_MQ_MAX_DEPTH > 1 << BLK_MQ_UNIQUE_TAG_BITS);

+       /* Test code: enable tag sharing for all block drivers except NVMe */
+       if (!set->ops->poll)
+               set->share_tags = true;
+
        if (!set->nr_hw_queues)
                return -EINVAL;
        if (!set->queue_depth)

Cc: Christoph Hellwig <hch@xxxxxxxxxxxxx>
Cc: Ming Lei <ming.lei@xxxxxxxxxx>
Cc: Hannes Reinecke <hare@xxxxxxxx>
Cc: John Garry <john.garry@xxxxxxxxxx>
Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx>
---
 block/blk-mq-debugfs.c | 40 ++++++++++++++++++++++++++++++++++++++--
 block/blk-mq-tag.c     |  7 +++++--
 block/blk-mq.c         | 28 +++++++++++++++++++++-------
 include/linux/blk-mq.h |  8 ++++++--
 4 files changed, 70 insertions(+), 13 deletions(-)

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 3678e95ec947..653e80ede3bd 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -472,20 +472,56 @@ static int hctx_tags_show(void *data, struct seq_file *m)
 	return res;
 }
 
+struct hctx_sb_data {
+	struct sbitmap		*sb;	/* output bitmap */
+	struct blk_mq_hw_ctx	*hctx;	/* input hctx */
+};
+
+static bool hctx_filter_fn(struct blk_mq_hw_ctx *hctx, struct request *req,
+			   void *priv, bool reserved)
+{
+	struct hctx_sb_data *hctx_sb_data = priv;
+
+	if (hctx == hctx_sb_data->hctx)
+		sbitmap_set_bit(hctx_sb_data->sb, req->tag);
+	return true;
+}
+
+static void hctx_filter_sb(struct sbitmap *sb, struct blk_mq_hw_ctx *hctx)
+{
+	struct hctx_sb_data hctx_sb_data = { .sb = sb, .hctx = hctx };
+
+	blk_mq_queue_tag_busy_iter(hctx->queue, hctx_filter_fn, &hctx_sb_data);
+}
+
 static int hctx_tags_bitmap_show(void *data, struct seq_file *m)
 {
 	struct blk_mq_hw_ctx *hctx = data;
 	struct request_queue *q = hctx->queue;
+	struct sbitmap sb, *hctx_sb;
 	int res;
 
+	if (!hctx->tags)
+		return 0;
+	hctx_sb = &hctx->tags->bitmap_tags.sb;
+	res = sbitmap_init_node(&sb, hctx_sb->depth, hctx_sb->shift, GFP_KERNEL,
+				NUMA_NO_NODE);
+	if (res)
+		return res;
+
 	res = mutex_lock_interruptible(&q->sysfs_lock);
 	if (res)
 		goto out;
-	if (hctx->tags)
-		sbitmap_bitmap_show(&hctx->tags->bitmap_tags.sb, m);
+	/*
+	 * If tags are shared across hardware queues, hctx_sb contains tags
+	 * for multiple hardware queues. Filter the tags for 'hctx' into 'sb'.
+	 */
+	hctx_filter_sb(&sb, hctx);
 	mutex_unlock(&q->sysfs_lock);
+	sbitmap_bitmap_show(&sb, m);
 
 out:
+	sbitmap_free(&sb);
 	return res;
 }
 
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index a60e1b4a8158..770fe2324230 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -220,7 +220,7 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
 	 * We can hit rq == NULL here, because the tagging functions
 	 * test and set the bit before assigning ->rqs[].
 	 */
-	if (rq && rq->q == hctx->queue)
+	if (rq && rq->q == hctx->queue && rq->mq_hctx == hctx)
 		return iter_data->fn(hctx, rq, iter_data->data, reserved);
 	return true;
 }
@@ -341,8 +341,11 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
 	int i;
 
 	for (i = 0; i < tagset->nr_hw_queues; i++) {
-		if (tagset->tags && tagset->tags[i])
+		if (tagset->tags && tagset->tags[i]) {
 			blk_mq_all_tag_busy_iter(tagset->tags[i], fn, priv);
+			if (tagset->share_tags)
+				break;
+		}
 	}
 }
 EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index fec4b82ff91c..fa4cfc4b7e7c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2404,6 +2404,12 @@ static bool __blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, int hctx_idx)
 {
 	int ret = 0;
 
+	if (hctx_idx > 0 && set->share_tags) {
+		WARN_ON_ONCE(!set->tags[0]);
+		set->tags[hctx_idx] = set->tags[0];
+		return 0;
+	}
+
 	set->tags[hctx_idx] = blk_mq_alloc_rq_map(set, hctx_idx,
 					set->queue_depth, set->reserved_tags);
 	if (!set->tags[hctx_idx])
@@ -2423,8 +2429,10 @@ static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set,
 					 unsigned int hctx_idx)
 {
 	if (set->tags && set->tags[hctx_idx]) {
-		blk_mq_free_rqs(set, set->tags[hctx_idx], hctx_idx);
-		blk_mq_free_rq_map(set->tags[hctx_idx]);
+		if (hctx_idx == 0 || !set->share_tags) {
+			blk_mq_free_rqs(set, set->tags[hctx_idx], hctx_idx);
+			blk_mq_free_rq_map(set->tags[hctx_idx]);
+		}
 		set->tags[hctx_idx] = NULL;
 	}
 }
@@ -2568,7 +2576,7 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)
 
 	mutex_lock(&set->tag_list_lock);
 	list_del_rcu(&q->tag_set_list);
-	if (list_is_singular(&set->tag_list)) {
+	if (list_is_singular(&set->tag_list) && !set->share_tags) {
 		/* just transitioned to unshared */
 		set->flags &= ~BLK_MQ_F_TAG_SHARED;
 		/* update existing queue */
@@ -2586,7 +2594,7 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
 	/*
 	 * Check to see if we're transitioning to shared (from 1 to 2 queues).
 	 */
-	if (!list_empty(&set->tag_list) &&
+	if ((!list_empty(&set->tag_list) || set->share_tags) &&
 	    !(set->flags & BLK_MQ_F_TAG_SHARED)) {
 		set->flags |= BLK_MQ_F_TAG_SHARED;
 		/* update existing queue */
@@ -2911,15 +2919,21 @@ static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
 {
 	int i;
 
-	for (i = 0; i < set->nr_hw_queues; i++)
-		if (!__blk_mq_alloc_rq_map(set, i))
+	for (i = 0; i < set->nr_hw_queues; i++) {
+		if (i > 0 && set->share_tags) {
+			set->tags[i] = set->tags[0];
+		} else if (!__blk_mq_alloc_rq_map(set, i))
 			goto out_unwind;
+	}
 
 	return 0;
 
 out_unwind:
-	while (--i >= 0)
+	while (--i >= 0) {
+		if (i > 0 && set->share_tags)
+			continue;
 		blk_mq_free_rq_map(set->tags[i]);
+	}
 
 	return -ENOMEM;
 }
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 522631d108af..dd5517476314 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -224,10 +224,13 @@ enum hctx_type {
  * @numa_node:	   NUMA node the storage adapter has been connected to.
  * @timeout:	   Request processing timeout in jiffies.
  * @flags:	   Zero or more BLK_MQ_F_* flags.
+ * @share_tags:	   Whether or not to share one tag set across hardware queues.
  * @driver_data:   Pointer to data owned by the block driver that created this
  *		   tag set.
- * @tags:	   Tag sets. One tag set per hardware queue. Has @nr_hw_queues
- *		   elements.
+ * @tags:	   Array of tag set pointers. Has @nr_hw_queues elements. If
+ *		   share_tags has not been set, all tag set pointers are
+ *		   different. If share_tags has been set, all tag_set pointers
+ *		   are identical.
  * @tag_list_lock: Serializes tag_list accesses.
  * @tag_list:	   List of the request queues that use this tag set. See also
  *		   request_queue.tag_set_list.
@@ -243,6 +246,7 @@ struct blk_mq_tag_set {
 	int			numa_node;
 	unsigned int		timeout;
 	unsigned int		flags;
+	bool			share_tags;
 	void			*driver_data;
 
 	struct blk_mq_tags	**tags;



[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux