On 06/05/2021 09:32, Ming Lei wrote:
+ if (blk_mq_is_sbitmap_shared(q->tag_set->flags)) { + ret = blk_mq_init_sched_shared_sbitmap(q); + if (ret) + goto err_free_tags; + + queue_for_each_hw_ctx(q, hctx, i) { + hctx->sched_tags->bitmap_tags = + q->sched_bitmap_tags; + hctx->sched_tags->breserved_tags = + q->sched_breserved_tags; + } }ret = e->ops.init_sched(q, e);if (ret) - goto err; + goto err_free_sbitmap;blk_mq_debugfs_register_sched(q); @@ -584,6 +590,7 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)eq = q->elevator; blk_mq_sched_free_requests(q); blk_mq_exit_sched(q, eq); + blk_mq_exit_sched_shared_sbitmap(q);blk_mq_exit_sched_shared_sbitmap() has been called in blk_mq_exit_sched() already.
ah, yes
kobject_put(&eq->kobj); return ret; } @@ -593,7 +600,10 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)return 0; -err:+err_free_sbitmap: + if (blk_mq_is_sbitmap_shared(q->tag_set->flags)) + blk_mq_exit_sched_shared_sbitmap(q); +err_free_tags: blk_mq_sched_free_requests(q); blk_mq_sched_tags_teardown(q); q->elevator = NULL; @@ -631,5 +641,7 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e) if (e->type->ops.exit_sched) e->type->ops.exit_sched(e); blk_mq_sched_tags_teardown(q); + if (blk_mq_is_sbitmap_shared(q->tag_set->flags)) + blk_mq_exit_sched_shared_sbitmap(q); q->elevator = NULL; } diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 2a37731e8244..734fedceca7d 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -466,19 +466,40 @@ static int blk_mq_init_bitmap_tags(struct blk_mq_tags *tags, return -ENOMEM; }-int blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *set, unsigned int flags)+static int __blk_mq_init_bitmaps(struct sbitmap_queue *bitmap_tags, + struct sbitmap_queue *breserved_tags, + struct blk_mq_tag_set *set, + unsigned int queue_depth, + unsigned int reserved) { - unsigned int depth = set->queue_depth - set->reserved_tags; + unsigned int depth = queue_depth - reserved; int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags); bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR; - int i, node = set->numa_node;- if (bt_alloc(&set->__bitmap_tags, depth, round_robin, node))+ if (bt_alloc(bitmap_tags, depth, round_robin, set->numa_node)) return -ENOMEM; - if (bt_alloc(&set->__breserved_tags, set->reserved_tags, - round_robin, node)) + if (bt_alloc(breserved_tags, set->reserved_tags, + round_robin, set->numa_node)) goto free_bitmap_tags;+ return 0;+ +free_bitmap_tags: + sbitmap_queue_free(bitmap_tags); + return -ENOMEM; +} + +int blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *set)IMO, this function should be named as blk_mq_init_shared_tags and moved to blk-mq-sched.c
But this is for regular tags. I assume you mean blk_mq_init_sched_shared_sbitmap(), below. If so, I can relocate it.As for "sbitmap" vs "tags" in the name, I'm just being consistent between preexisting blk_mq_init_shared_sbitmap() and blk_mq_sbitmap_shared(), and new blk_mq_init_sched_shared_sbitmap()
+{ + int i, ret; + + ret = __blk_mq_init_bitmaps(&set->__bitmap_tags, + &set->__breserved_tags, + set, set->queue_depth, + set->reserved_tags); + if (ret) + return ret; + for (i = 0; i < set->nr_hw_queues; i++) { struct blk_mq_tags *tags = set->tags[i];@@ -487,9 +508,6 @@ int blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *set, unsigned int flags)}return 0;-free_bitmap_tags: - sbitmap_queue_free(&set->__bitmap_tags); - return -ENOMEM; }void blk_mq_exit_shared_sbitmap(struct blk_mq_tag_set *set)@@ -498,6 +516,52 @@ void blk_mq_exit_shared_sbitmap(struct blk_mq_tag_set *set) sbitmap_queue_free(&set->__breserved_tags); }+#define MAX_SCHED_RQ (16 * BLKDEV_MAX_RQ)+ +int blk_mq_init_sched_shared_sbitmap(struct request_queue *queue) +{ + struct blk_mq_tag_set *set = queue->tag_set; + int ret; + + queue->sched_bitmap_tags = + kmalloc(sizeof(*queue->sched_bitmap_tags), GFP_KERNEL); + queue->sched_breserved_tags = + kmalloc(sizeof(*queue->sched_breserved_tags), GFP_KERNEL); + if (!queue->sched_bitmap_tags || !queue->sched_breserved_tags) + goto err;The two sbitmap queues can be embedded into 'request queue', so that we can avoid to re-allocation in every elevator switch.
ok
I will ask Yanhui to test the patch and see if it can make a difference.
Thanks