Re: [PATCH V3 3/3] blk-mq: fix dispatch from sw queue

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 12/17/18 5:55 AM, Jens Axboe wrote:
> On 12/17/18 5:08 AM, Ming Lei wrote:
>> When requst is added to rq list of sw queue(ctx), the rq may be from
>> different type of hctx, especially after multi queue mapping is introduced.
>>
>> So when dispach request from sw queue via blk_mq_flush_busy_ctxs() or
>> blk_mq_dequeue_from_ctx(), one request belonging to other queue type of
>> hctx can be dispatch to current hctx in case that read queue or poll queue
>> is enabled.
>>
>> This patch fixes this issue by introducing per-queue-type list.
> 
> Looks good, just one comment:
> 
>> diff --git a/block/blk-mq.h b/block/blk-mq.h
>> index d1ed096723fb..0973a91eb1dd 100644
>> --- a/block/blk-mq.h
>> +++ b/block/blk-mq.h
>> @@ -12,14 +12,16 @@ struct blk_mq_ctxs {
>>  	struct blk_mq_ctx __percpu	*queue_ctx;
>>  };
>>  
>> +struct blk_mq_ctx_list {
>> +	spinlock_t		lock;
>> +	struct list_head	rq_list;
>> +}  ____cacheline_aligned_in_smp;
>> +
>>  /**
>>   * struct blk_mq_ctx - State for a software queue facing the submitting CPUs
>>   */
>>  struct blk_mq_ctx {
>> -	struct {
>> -		spinlock_t		lock;
>> -		struct list_head	rq_list;
>> -	}  ____cacheline_aligned_in_smp;
>> +	struct blk_mq_ctx_list  list[HCTX_MAX_TYPES];
> 
> Let's not make that use 3 cachelines. There is no good reason to split
> these across cachelines, if we have heavy traffic to multiple of these,
> then we're not going very fast anyway. So just make it:
> 
> 	struct {
> 		spinlock_t		lock;
> 		struct list_head	rq_list[HCTX_MAX_TYPES];
> 	}  ____cacheline_aligned_in_smp;

Did it just to check, turns out fine and is of course less changes.
Let me know.


diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 1e12033be9ea..90d68760af08 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -652,36 +652,43 @@ static int hctx_dispatch_busy_show(void *data, struct seq_file *m)
 	return 0;
 }
 
-static void *ctx_rq_list_start(struct seq_file *m, loff_t *pos)
-	__acquires(&ctx->lock)
-{
-	struct blk_mq_ctx *ctx = m->private;
-
-	spin_lock(&ctx->lock);
-	return seq_list_start(&ctx->rq_list, *pos);
-}
-
-static void *ctx_rq_list_next(struct seq_file *m, void *v, loff_t *pos)
-{
-	struct blk_mq_ctx *ctx = m->private;
-
-	return seq_list_next(v, &ctx->rq_list, pos);
-}
+#define CTX_RQ_SEQ_OPS(name, type)					\
+static void *ctx_##name##_rq_list_start(struct seq_file *m, loff_t *pos) \
+	__acquires(&ctx->lock)						\
+{									\
+	struct blk_mq_ctx *ctx = m->private;				\
+									\
+	spin_lock(&ctx->lock);						\
+	return seq_list_start(&ctx->rq_lists[type], *pos);		\
+}									\
+									\
+static void *ctx_##name##_rq_list_next(struct seq_file *m, void *v,	\
+				     loff_t *pos)			\
+{									\
+	struct blk_mq_ctx *ctx = m->private;				\
+									\
+	return seq_list_next(v, &ctx->rq_lists[type], pos);		\
+}									\
+									\
+static void ctx_##name##_rq_list_stop(struct seq_file *m, void *v)	\
+	__releases(&ctx->lock)						\
+{									\
+	struct blk_mq_ctx *ctx = m->private;				\
+									\
+	spin_unlock(&ctx->lock);					\
+}									\
+									\
+static const struct seq_operations ctx_##name##_rq_list_seq_ops = {	\
+	.start	= ctx_##name##_rq_list_start,				\
+	.next	= ctx_##name##_rq_list_next,				\
+	.stop	= ctx_##name##_rq_list_stop,				\
+	.show	= blk_mq_debugfs_rq_show,				\
+}
+
+CTX_RQ_SEQ_OPS(default, HCTX_TYPE_DEFAULT);
+CTX_RQ_SEQ_OPS(read, HCTX_TYPE_READ);
+CTX_RQ_SEQ_OPS(poll, HCTX_TYPE_POLL);
 
-static void ctx_rq_list_stop(struct seq_file *m, void *v)
-	__releases(&ctx->lock)
-{
-	struct blk_mq_ctx *ctx = m->private;
-
-	spin_unlock(&ctx->lock);
-}
-
-static const struct seq_operations ctx_rq_list_seq_ops = {
-	.start	= ctx_rq_list_start,
-	.next	= ctx_rq_list_next,
-	.stop	= ctx_rq_list_stop,
-	.show	= blk_mq_debugfs_rq_show,
-};
 static int ctx_dispatched_show(void *data, struct seq_file *m)
 {
 	struct blk_mq_ctx *ctx = data;
@@ -819,7 +826,9 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
 };
 
 static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = {
-	{"rq_list", 0400, .seq_ops = &ctx_rq_list_seq_ops},
+	{"default_rq_list", 0400, .seq_ops = &ctx_default_rq_list_seq_ops},
+	{"read_rq_list", 0400, .seq_ops = &ctx_read_rq_list_seq_ops},
+	{"poll_rq_list", 0400, .seq_ops = &ctx_poll_rq_list_seq_ops},
 	{"dispatched", 0600, ctx_dispatched_show, ctx_dispatched_write},
 	{"merged", 0600, ctx_merged_show, ctx_merged_write},
 	{"completed", 0600, ctx_completed_show, ctx_completed_write},
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 1efd781fcdea..5af40bbf4fc6 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -301,11 +301,14 @@ EXPORT_SYMBOL_GPL(blk_mq_bio_list_merge);
  * too much time checking for merges.
  */
 static bool blk_mq_attempt_merge(struct request_queue *q,
+				 struct blk_mq_hw_ctx *hctx,
 				 struct blk_mq_ctx *ctx, struct bio *bio)
 {
+	enum hctx_type type = hctx->type;
+
 	lockdep_assert_held(&ctx->lock);
 
-	if (blk_mq_bio_list_merge(q, &ctx->rq_list, bio)) {
+	if (blk_mq_bio_list_merge(q, &ctx->rq_lists[type], bio)) {
 		ctx->rq_merged++;
 		return true;
 	}
@@ -319,17 +322,19 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
 	struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
 	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx->cpu);
 	bool ret = false;
+	enum hctx_type type;
 
 	if (e && e->type->ops.bio_merge) {
 		blk_mq_put_ctx(ctx);
 		return e->type->ops.bio_merge(hctx, bio);
 	}
 
+	type = hctx->type;
 	if ((hctx->flags & BLK_MQ_F_SHOULD_MERGE) &&
-			!list_empty_careful(&ctx->rq_list)) {
+			!list_empty_careful(&ctx->rq_lists[type])) {
 		/* default per sw-queue merge */
 		spin_lock(&ctx->lock);
-		ret = blk_mq_attempt_merge(q, ctx, bio);
+		ret = blk_mq_attempt_merge(q, hctx, ctx, bio);
 		spin_unlock(&ctx->lock);
 	}
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 313f28b2d079..1546e88fe59c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -958,9 +958,10 @@ static bool flush_busy_ctx(struct sbitmap *sb, unsigned int bitnr, void *data)
 	struct flush_busy_ctx_data *flush_data = data;
 	struct blk_mq_hw_ctx *hctx = flush_data->hctx;
 	struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
+	enum hctx_type type = hctx->type;
 
 	spin_lock(&ctx->lock);
-	list_splice_tail_init(&ctx->rq_list, flush_data->list);
+	list_splice_tail_init(&ctx->rq_lists[type], flush_data->list);
 	sbitmap_clear_bit(sb, bitnr);
 	spin_unlock(&ctx->lock);
 	return true;
@@ -992,12 +993,13 @@ static bool dispatch_rq_from_ctx(struct sbitmap *sb, unsigned int bitnr,
 	struct dispatch_rq_data *dispatch_data = data;
 	struct blk_mq_hw_ctx *hctx = dispatch_data->hctx;
 	struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
+	enum hctx_type type = hctx->type;
 
 	spin_lock(&ctx->lock);
-	if (!list_empty(&ctx->rq_list)) {
-		dispatch_data->rq = list_entry_rq(ctx->rq_list.next);
+	if (!list_empty(&ctx->rq_lists[type])) {
+		dispatch_data->rq = list_entry_rq(ctx->rq_lists[type].next);
 		list_del_init(&dispatch_data->rq->queuelist);
-		if (list_empty(&ctx->rq_list))
+		if (list_empty(&ctx->rq_lists[type]))
 			sbitmap_clear_bit(sb, bitnr);
 	}
 	spin_unlock(&ctx->lock);
@@ -1608,15 +1610,16 @@ static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
 					    bool at_head)
 {
 	struct blk_mq_ctx *ctx = rq->mq_ctx;
+	enum hctx_type type = hctx->type;
 
 	lockdep_assert_held(&ctx->lock);
 
 	trace_block_rq_insert(hctx->queue, rq);
 
 	if (at_head)
-		list_add(&rq->queuelist, &ctx->rq_list);
+		list_add(&rq->queuelist, &ctx->rq_lists[type]);
 	else
-		list_add_tail(&rq->queuelist, &ctx->rq_list);
+		list_add_tail(&rq->queuelist, &ctx->rq_lists[type]);
 }
 
 void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
@@ -1651,6 +1654,7 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
 
 {
 	struct request *rq;
+	enum hctx_type type = hctx->type;
 
 	/*
 	 * preemption doesn't flush plug list, so it's possible ctx->cpu is
@@ -1662,7 +1666,7 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
 	}
 
 	spin_lock(&ctx->lock);
-	list_splice_tail_init(list, &ctx->rq_list);
+	list_splice_tail_init(list, &ctx->rq_lists[type]);
 	blk_mq_hctx_mark_pending(hctx, ctx);
 	spin_unlock(&ctx->lock);
 }
@@ -2200,13 +2204,15 @@ static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node)
 	struct blk_mq_hw_ctx *hctx;
 	struct blk_mq_ctx *ctx;
 	LIST_HEAD(tmp);
+	enum hctx_type type;
 
 	hctx = hlist_entry_safe(node, struct blk_mq_hw_ctx, cpuhp_dead);
 	ctx = __blk_mq_get_ctx(hctx->queue, cpu);
+	type = hctx->type;
 
 	spin_lock(&ctx->lock);
-	if (!list_empty(&ctx->rq_list)) {
-		list_splice_init(&ctx->rq_list, &tmp);
+	if (!list_empty(&ctx->rq_lists[type])) {
+		list_splice_init(&ctx->rq_lists[type], &tmp);
 		blk_mq_hctx_clear_pending(hctx, ctx);
 	}
 	spin_unlock(&ctx->lock);
@@ -2343,10 +2349,14 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
 	for_each_possible_cpu(i) {
 		struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i);
 		struct blk_mq_hw_ctx *hctx;
+		int k;
 
 		__ctx->cpu = i;
+
 		spin_lock_init(&__ctx->lock);
-		INIT_LIST_HEAD(&__ctx->rq_list);
+		for (k = HCTX_TYPE_DEFAULT; k < HCTX_MAX_TYPES; k++) {
+			INIT_LIST_HEAD(&__ctx->rq_lists[k]);
+		}
 		__ctx->queue = q;
 
 		/*
diff --git a/block/blk-mq.h b/block/blk-mq.h
index d1ed096723fb..d943d46b0785 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -18,8 +18,8 @@ struct blk_mq_ctxs {
 struct blk_mq_ctx {
 	struct {
 		spinlock_t		lock;
-		struct list_head	rq_list;
-	}  ____cacheline_aligned_in_smp;
+		struct list_head	rq_lists[HCTX_MAX_TYPES];
+	} ____cacheline_aligned_in_smp;
 
 	unsigned int		cpu;
 	unsigned short		index_hw[HCTX_MAX_TYPES];

-- 
Jens Axboe




[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux