Re: [PATCH 2/2] block: improve batched tag allocation

Christoph Hellwig <hch@xxxxxxxxxxxxx> · Thu, 14 Oct 2021 06:17:47 +0100

On Tue, Oct 12, 2021 at 11:15:25AM -0600, Jens Axboe wrote:
>  
> +unsigned long blk_mq_get_tags(struct blk_mq_alloc_data *data, int nr_tags,
> +			      unsigned int *offset)
> +{
> +	struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
> +	struct sbitmap_queue *bt = &tags->bitmap_tags;
> +	unsigned long ret;
> +
> +	if (data->shallow_depth ||data->flags & BLK_MQ_REQ_RESERVED ||

Missing whitespace after the first ||.

> +	if (data->nr_tags > 1) {
> +		unsigned long tags;
> +		unsigned int tag_offset;
> +		int i, nr = 0;
> +
> +		tags = blk_mq_get_tags(data, data->nr_tags, &tag_offset);
> +		if (unlikely(!tags)) {
> +			data->nr_tags = 1;
> +			goto retry;

Unless I'm missing something we don't want the retry case that maps
the contexts against and calls blk_mq_tag_busy again.

> +		}
> +		for (i = 0; tags; i++) {
> +			if (!(tags & (1UL << i)))
> +				continue;
> +			tag = tag_offset + i;
> +			tags &= ~(1UL << i);
> +			rq = blk_mq_rq_ctx_init(data, tag, alloc_time_ns);
> +			rq->rq_next = *data->cached_rq;
> +			*data->cached_rq = rq;
> +		}
> +		data->nr_tags -= nr;

And keeping all this batch logic in a helper (even if inlined) would
simplify the code a lot.  Something like this untested patch:

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 5ac94149fb4be..608b270a7f6b8 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -373,6 +373,38 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 	return rq;
 }
 
+static inline struct request *
+__blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data,
+		u64 alloc_time_ns)
+{
+	unsigned int tag, tag_offset;
+	struct request *rq;
+	unsigned long tags;
+	int i, nr = 0;
+
+	tags = blk_mq_get_tags(data, data->nr_tags, &tag_offset);
+	if (unlikely(!tags))
+		return NULL;
+
+	for (i = 0; tags; i++) {
+		if (!(tags & (1UL << i)))
+			continue;
+		tag = tag_offset + i;
+		tags &= ~(1UL << i);
+		rq = blk_mq_rq_ctx_init(data, tag, alloc_time_ns);
+		rq->rq_next = *data->cached_rq;
+		*data->cached_rq = rq;
+	}
+	data->nr_tags -= nr;
+
+	if (!data->cached_rq)
+		return NULL;
+
+	rq = *data->cached_rq;
+	*data->cached_rq = rq->rq_next;
+	return rq;
+}
+
 static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
 {
 	struct request_queue *q = data->q;
@@ -411,56 +443,32 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
 	 * Try batched alloc if we want more than 1 tag.
 	 */
 	if (data->nr_tags > 1) {
-		unsigned long tags;
-		unsigned int tag_offset;
-		int i, nr = 0;
-
-		tags = blk_mq_get_tags(data, data->nr_tags, &tag_offset);
-		if (unlikely(!tags)) {
-			data->nr_tags = 1;
-			goto retry;
-		}
-		for (i = 0; tags; i++) {
-			if (!(tags & (1UL << i)))
-				continue;
-			tag = tag_offset + i;
-			tags &= ~(1UL << i);
-			rq = blk_mq_rq_ctx_init(data, tag, alloc_time_ns);
-			rq->rq_next = *data->cached_rq;
-			*data->cached_rq = rq;
-		}
-		data->nr_tags -= nr;
-	} else {
-		/*
-		 * Waiting allocations only fail because of an inactive hctx.
-		 * In that case just retry the hctx assignment and tag
-		 * allocation as CPU hotplug should have migrated us to an
-		 * online CPU by now.
-		 */
-		tag = blk_mq_get_tag(data);
-		if (tag == BLK_MQ_NO_TAG) {
-			if (data->flags & BLK_MQ_REQ_NOWAIT)
-				return NULL;
-			/*
-			 * Give up the CPU and sleep for a random short time to
-			 * ensure that thread using a realtime scheduling class
-			 * are migrated off the CPU, and thus off the hctx that
-			 * is going away.
-			 */
-			msleep(3);
-			goto retry;
-		}
-
-		return blk_mq_rq_ctx_init(data, tag, alloc_time_ns);
+		rq = __blk_mq_alloc_requests_batch(data, alloc_time_ns);
+		if (rq)
+			return rq;
+		data->nr_tags = 1;
 	}
 
-	if (data->cached_rq) {
-		rq = *data->cached_rq;
-		*data->cached_rq = rq->rq_next;
-		return rq;
+	/*
+	 * Waiting allocations only fail because of an inactive hctx.  In that
+	 * case just retry the hctx assignment and tag allocation as CPU hotplug
+	 * should have migrated us to an online CPU by now.
+	 */
+	tag = blk_mq_get_tag(data);
+	if (tag == BLK_MQ_NO_TAG) {
+		if (data->flags & BLK_MQ_REQ_NOWAIT)
+			return NULL;
+		/*
+		 * Give up the CPU and sleep for a random short time to
+		 * ensure that thread using a realtime scheduling class
+		 * are migrated off the CPU, and thus off the hctx that
+		 * is going away.
+		 */
+		msleep(3);
+		goto retry;
 	}
 
-	return NULL;
+	return blk_mq_rq_ctx_init(data, tag, alloc_time_ns);
 }
 
 struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,