On Thu, Nov 21, 2019 at 10:24:16AM +0000, John Garry wrote: > > > int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) > > > { > > > + struct blk_mq_tag_set *tag_set = q->tag_set; > > > struct blk_mq_hw_ctx *hctx; > > > struct elevator_queue *eq; > > > unsigned int i; > > > @@ -537,6 +538,19 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) > > > blk_mq_debugfs_register_sched_hctx(q, hctx); > > > } > > > + if (blk_mq_is_sbitmap_shared(tag_set)) { > > > + if (!blk_mq_init_sched_shared_sbitmap(tag_set, q->nr_requests)) { > > > + ret = -ENOMEM; > > > + goto err; > > > + } > > > + queue_for_each_hw_ctx(q, hctx, i) { > > > + struct blk_mq_tags *tags = hctx->sched_tags; > > > + > > > + tags->pbitmap_tags = &tag_set->sched_shared_bitmap_tags; > > > + tags->pbreserved_tags = &tag_set->sched_shared_breserved_tags; > > > > This kind of sharing is wrong, sched tags should be request queue wide > > instead of tagset wide, and each request queue has its own & independent > > scheduler queue. > > Right, so if we get get a scheduler tag we still need to get a driver tag, > and this would be the "shared" tag. > > That makes things simpler then. > > > > > > + } > > > + } > > > + > > > return 0; > > > err: > > > diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c > > > index 42792942b428..6625bebb46c3 100644 > > > --- a/block/blk-mq-tag.c > > > +++ b/block/blk-mq-tag.c > > > @@ -35,9 +35,9 @@ bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) > > > */ > > > void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve) > > > { > > > - sbitmap_queue_wake_all(&tags->bitmap_tags); > > > + sbitmap_queue_wake_all(tags->pbitmap_tags); > > > if (include_reserve) > > > - sbitmap_queue_wake_all(&tags->breserved_tags); > > > + sbitmap_queue_wake_all(tags->pbreserved_tags); > > > } > > [...] > > > > > mutex_init(&set->tag_list_lock); > > > INIT_LIST_HEAD(&set->tag_list); > > > @@ -3137,6 +3151,7 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) > > > { > > > struct blk_mq_tag_set *set = q->tag_set; > > > struct blk_mq_hw_ctx *hctx; > > > + bool sched_tags = false; > > > int i, ret; > > > if (!set) > > > @@ -3160,6 +3175,7 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) > > > ret = blk_mq_tag_update_depth(hctx, &hctx->tags, nr, > > > false); > > > } else { > > > + sched_tags = true; > > > ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags, > > > nr, true); > > > } > > > @@ -3169,8 +3185,41 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) > > > q->elevator->type->ops.depth_updated(hctx); > > > } > > > - if (!ret) > > > + /* > > > + * if ret is 0, all queues should have been updated to the same depth > > > + * if not, then maybe some have been updated - yuk, need to handle this for shared sbitmap... > > > + * if some are updated, we should probably roll back the change altogether. FIXME > > > + */ > > > + if (!ret) { > > > + if (blk_mq_is_sbitmap_shared(set)) { > > > + if (sched_tags) { > > > + sbitmap_queue_free(&set->sched_shared_bitmap_tags); > > > + sbitmap_queue_free(&set->sched_shared_breserved_tags); > > > + if (!blk_mq_init_sched_shared_sbitmap(set, nr)) > > > + return -ENOMEM; /* fixup error handling */ > > > + > > > + queue_for_each_hw_ctx(q, hctx, i) { > > > + hctx->sched_tags->pbitmap_tags = &set->sched_shared_bitmap_tags; > > > + hctx->sched_tags->pbreserved_tags = &set->sched_shared_breserved_tags; > > > + } > > > + } else { > > > + sbitmap_queue_free(&set->shared_bitmap_tags); > > > + sbitmap_queue_free(&set->shared_breserved_tags); > > > + if (!blk_mq_init_shared_sbitmap(set)) > > > + return -ENOMEM; /* fixup error handling */ > > > > No, we can't re-allocate driver tags here which are shared by all LUNs. > And you should see that 'can_grow' is set as false for driver tags > > in blk_mq_update_nr_requests(), which can only touch per-request-queue > > data, not tagset wide data. > > Yeah, I see that. We should just resize for driver tags bitmap. > > Personally I think the mainline code is a little loose here, as if we could > grow driver tags, then blk_mq_tagset.tags would be out-of-sync with the > hctx->tags. Maybe that should be made more explicit in the code. > > BTW, do you have anything to say about this (modified slightly) comment: > > /* > * if ret != 0, q->nr_requests would not be updated, yet the depth > * for some hctx sched tags may have changed - is that the right thing > * to do? > */ In theory, your concern is right, but so far we only support same depth of hctx for either sched tags or driver tags, so not an issue so far. Thanks, Ming