On 02/03/2018 05:21 AM, Ming Lei wrote: > Quite a few HBAs(such as HPSA, megaraid, mpt3sas, ..) support multiple > reply queues, but tags is often HBA wide. > > These HBAs have switched to use pci_alloc_irq_vectors(PCI_IRQ_AFFINITY) > for automatic affinity assignment. > > Now 84676c1f21e8ff5(genirq/affinity: assign vectors to all possible CPUs) > has been merged to V4.16-rc, and it is easy to allocate all offline CPUs > for some irq vectors, this can't be avoided even though the allocation > is improved. > > So all these drivers have to avoid to ask HBA to complete request in > reply queue which hasn't online CPUs assigned, and HPSA has been broken > with v4.15+: > > https://marc.info/?l=linux-kernel&m=151748144730409&w=2 > > This issue can be solved generically and easily via blk_mq(scsi_mq) multiple > hw queue by mapping each reply queue into hctx, but one tricky thing is > the HBA wide(instead of hw queue wide) tags. > > This patch is based on the following Hannes's patch: > > https://marc.info/?l=linux-block&m=149132580511346&w=2 > > One big difference with Hannes's is that this patch only makes the tags sbitmap > and active_queues data structure HBA wide, and others are kept as NUMA locality, > such as request, hctx, tags, ... > > The following patch will support global tags on null_blk, also the performance > data is provided, no obvious performance loss is observed when the whole > hw queue depth is same. > > Cc: Hannes Reinecke <hare@xxxxxxx> > Cc: Arun Easi <arun.easi@xxxxxxxxxx> > Cc: Omar Sandoval <osandov@xxxxxx>, > Cc: "Martin K. Petersen" <martin.petersen@xxxxxxxxxx>, > Cc: James Bottomley <james.bottomley@xxxxxxxxxxxxxxxxxxxxx>, > Cc: Christoph Hellwig <hch@xxxxxx>, > Cc: Don Brace <don.brace@xxxxxxxxxxxxx> > Cc: Kashyap Desai <kashyap.desai@xxxxxxxxxxxx> > Cc: Peter Rivera <peter.rivera@xxxxxxxxxxxx> > Cc: Laurence Oberman <loberman@xxxxxxxxxx> > Cc: Mike Snitzer <snitzer@xxxxxxxxxx> > Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx> > --- > block/blk-mq-debugfs.c | 1 + > block/blk-mq-sched.c | 2 +- > block/blk-mq-tag.c | 23 ++++++++++++++++++----- > block/blk-mq-tag.h | 5 ++++- > block/blk-mq.c | 29 ++++++++++++++++++++++++----- > block/blk-mq.h | 3 ++- > include/linux/blk-mq.h | 2 ++ > 7 files changed, 52 insertions(+), 13 deletions(-) > > diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c > index 0dfafa4b655a..0f0fafe03f5d 100644 > --- a/block/blk-mq-debugfs.c > +++ b/block/blk-mq-debugfs.c > @@ -206,6 +206,7 @@ static const char *const hctx_flag_name[] = { > HCTX_FLAG_NAME(SHOULD_MERGE), > HCTX_FLAG_NAME(TAG_SHARED), > HCTX_FLAG_NAME(SG_MERGE), > + HCTX_FLAG_NAME(GLOBAL_TAGS), > HCTX_FLAG_NAME(BLOCKING), > HCTX_FLAG_NAME(NO_SCHED), > }; > diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c > index 55c0a745b427..191d4bc95f0e 100644 > --- a/block/blk-mq-sched.c > +++ b/block/blk-mq-sched.c > @@ -495,7 +495,7 @@ static int blk_mq_sched_alloc_tags(struct request_queue *q, > int ret; > > hctx->sched_tags = blk_mq_alloc_rq_map(set, hctx_idx, q->nr_requests, > - set->reserved_tags); > + set->reserved_tags, false); > if (!hctx->sched_tags) > return -ENOMEM; > > diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c > index 571797dc36cb..66377d09eaeb 100644 > --- a/block/blk-mq-tag.c > +++ b/block/blk-mq-tag.c > @@ -379,9 +379,11 @@ static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags, > return NULL; > } > > -struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, > +struct blk_mq_tags *blk_mq_init_tags(struct blk_mq_tag_set *set, > + unsigned int total_tags, > unsigned int reserved_tags, > - int node, int alloc_policy) > + int node, int alloc_policy, > + bool global_tag) > { > struct blk_mq_tags *tags; > > @@ -397,6 +399,14 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, > tags->nr_tags = total_tags; > tags->nr_reserved_tags = reserved_tags; > > + WARN_ON(global_tag && !set->global_tags); > + if (global_tag && set->global_tags) { > + tags->bitmap_tags = set->global_tags->bitmap_tags; > + tags->breserved_tags = set->global_tags->breserved_tags; > + tags->active_queues = set->global_tags->active_queues; > + tags->global_tags = true; > + return tags; > + } > tags->bitmap_tags = &tags->__bitmap_tags; > tags->breserved_tags = &tags->__breserved_tags; > tags->active_queues = &tags->__active_queues; Do we really need the 'global_tag' flag here? Can't we just rely on the ->global_tags pointer to be set? > @@ -406,8 +416,10 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, > > void blk_mq_free_tags(struct blk_mq_tags *tags) > { > - sbitmap_queue_free(tags->bitmap_tags); > - sbitmap_queue_free(tags->breserved_tags); > + if (!tags->global_tags) { > + sbitmap_queue_free(tags->bitmap_tags); > + sbitmap_queue_free(tags->breserved_tags); > + } > kfree(tags); > } > > @@ -441,7 +453,8 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, > if (tdepth > 16 * BLKDEV_MAX_RQ) > return -EINVAL; > > - new = blk_mq_alloc_rq_map(set, hctx->queue_num, tdepth, 0); > + new = blk_mq_alloc_rq_map(set, hctx->queue_num, tdepth, 0, > + (*tagsptr)->global_tags); > if (!new) > return -ENOMEM; > ret = blk_mq_alloc_rqs(set, new, hctx->queue_num, tdepth); > diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h > index a68323fa0c02..a87b5cfa5726 100644 > --- a/block/blk-mq-tag.h > +++ b/block/blk-mq-tag.h > @@ -20,13 +20,16 @@ struct blk_mq_tags { > struct sbitmap_queue __bitmap_tags; > struct sbitmap_queue __breserved_tags; > > + bool global_tags; > struct request **rqs; > struct request **static_rqs; > struct list_head page_list; > }; > > > -extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node, int alloc_policy); > +extern struct blk_mq_tags *blk_mq_init_tags(struct blk_mq_tag_set *set, > + unsigned int nr_tags, unsigned int reserved_tags, int node, > + int alloc_policy, bool global_tag); > extern void blk_mq_free_tags(struct blk_mq_tags *tags); > > extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); > diff --git a/block/blk-mq.c b/block/blk-mq.c > index 69d4534870af..a98466dc71b5 100644 > --- a/block/blk-mq.c > +++ b/block/blk-mq.c > @@ -2023,7 +2023,8 @@ void blk_mq_free_rq_map(struct blk_mq_tags *tags) > struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, > unsigned int hctx_idx, > unsigned int nr_tags, > - unsigned int reserved_tags) > + unsigned int reserved_tags, > + bool global_tags) > { > struct blk_mq_tags *tags; > int node; > @@ -2032,8 +2033,9 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, > if (node == NUMA_NO_NODE) > node = set->numa_node; > > - tags = blk_mq_init_tags(nr_tags, reserved_tags, node, > - BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags)); > + tags = blk_mq_init_tags(set, nr_tags, reserved_tags, node, > + BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags), > + global_tags); > if (!tags) > return NULL; > > @@ -2336,7 +2338,8 @@ static bool __blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, int hctx_idx) > int ret = 0; > > set->tags[hctx_idx] = blk_mq_alloc_rq_map(set, hctx_idx, > - set->queue_depth, set->reserved_tags); > + set->queue_depth, set->reserved_tags, > + !!(set->flags & BLK_MQ_F_GLOBAL_TAGS)); > if (!set->tags[hctx_idx]) > return false; > > @@ -2891,15 +2894,28 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) > if (ret) > goto out_free_mq_map; > > + if (set->flags & BLK_MQ_F_GLOBAL_TAGS) { > + ret = -ENOMEM; > + set->global_tags = blk_mq_init_tags(set, set->queue_depth, > + set->reserved_tags, set->numa_node, > + BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags), > + false); > + if (!set->global_tags) > + goto out_free_mq_map; > + } > + > ret = blk_mq_alloc_rq_maps(set); > if (ret) > - goto out_free_mq_map; > + goto out_free_global_tags; > > mutex_init(&set->tag_list_lock); > INIT_LIST_HEAD(&set->tag_list); > > return 0; > > +out_free_global_tags: > + if (set->global_tags) > + blk_mq_free_tags(set->global_tags); > out_free_mq_map: > kfree(set->mq_map); > set->mq_map = NULL; > @@ -2914,6 +2930,9 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set) > { > int i; > > + if (set->global_tags) > + blk_mq_free_tags(set->global_tags); > + > for (i = 0; i < nr_cpu_ids; i++) > blk_mq_free_map_and_requests(set, i); > > diff --git a/block/blk-mq.h b/block/blk-mq.h > index 88c558f71819..d1d9a0a8e1fa 100644 > --- a/block/blk-mq.h > +++ b/block/blk-mq.h > @@ -61,7 +61,8 @@ void blk_mq_free_rq_map(struct blk_mq_tags *tags); > struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, > unsigned int hctx_idx, > unsigned int nr_tags, > - unsigned int reserved_tags); > + unsigned int reserved_tags, > + bool global_tags); > int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, > unsigned int hctx_idx, unsigned int depth); > > diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h > index 8efcf49796a3..8548c72d6b4a 100644 > --- a/include/linux/blk-mq.h > +++ b/include/linux/blk-mq.h > @@ -82,6 +82,7 @@ struct blk_mq_tag_set { > void *driver_data; > > struct blk_mq_tags **tags; > + struct blk_mq_tags *global_tags; /* for BLK_MQ_F_GLOBAL_TAGS */ > > struct mutex tag_list_lock; > struct list_head tag_list; > @@ -175,6 +176,7 @@ enum { > BLK_MQ_F_SHOULD_MERGE = 1 << 0, > BLK_MQ_F_TAG_SHARED = 1 << 1, > BLK_MQ_F_SG_MERGE = 1 << 2, > + BLK_MQ_F_GLOBAL_TAGS = 1 << 3, > BLK_MQ_F_BLOCKING = 1 << 5, > BLK_MQ_F_NO_SCHED = 1 << 6, > BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, > Otherwise: Reviewed-by: Hannes Reinecke <hare@xxxxxxxx> Cheers, Hannes -- Dr. Hannes Reinecke Teamlead Storage & Networking hare@xxxxxxx +49 911 74053 688 SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton HRB 21284 (AG Nürnberg)