On Wed, Jul 14, 2021 at 11:06:26PM +0800, John Garry wrote: > Currently a full set of static requests are allocated per hw queue per > tagset when shared sbitmap is used. > > However, only tagset->queue_depth number of requests may be active at > any given time. As such, only tagset->queue_depth number of static > requests are required. > > The same goes for using an IO scheduler, which allocates a full set of > static requests per hw queue per request queue. > > This series very significantly reduces memory usage in both scenarios by > allocating static rqs per tagset and per request queue, respectively, > rather than per hw queue per tagset and per request queue. > > For megaraid sas driver on my 128-CPU arm64 system with 1x SATA disk, we > save approx. 300MB(!) [370MB -> 60MB] > > A couple of patches are marked as RFC, as maybe there is a better > implementation approach. There is another candidate for addressing this issue, and looks simpler: block/blk-mq-sched.c | 4 ++++ block/blk-mq-tag.c | 4 ++++ block/blk-mq-tag.h | 3 +++ block/blk-mq.c | 18 ++++++++++++++++++ block/blk-mq.h | 11 +++++++++++ 5 files changed, 40 insertions(+) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index c838d81ac058..b9236ee0fe4e 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -538,6 +538,10 @@ static int blk_mq_sched_alloc_tags(struct request_queue *q, if (!hctx->sched_tags) return -ENOMEM; + blk_mq_set_master_tags(hctx->sched_tags, + q->queue_hw_ctx[0]->sched_tags, hctx->flags, + hctx_idx); + ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests); if (ret) blk_mq_sched_free_tags(set, hctx, hctx_idx); diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 86f87346232a..c471a073234d 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -608,6 +608,10 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, tags->nr_reserved_tags, set->flags); if (!new) return -ENOMEM; + + blk_mq_set_master_tags(new, + hctx->queue->queue_hw_ctx[0]->sched_tags, set->flags, + hctx->queue_num); ret = blk_mq_alloc_rqs(set, new, hctx->queue_num, tdepth); if (ret) { blk_mq_free_rq_map(new, set->flags); diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 8ed55af08427..0a3fbbc61e06 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -21,6 +21,9 @@ struct blk_mq_tags { struct request **static_rqs; struct list_head page_list; + /* only used for blk_mq_is_sbitmap_shared() */ + struct blk_mq_tags *master; + /* * used to clear request reference in rqs[] before freeing one * request pool diff --git a/block/blk-mq.c b/block/blk-mq.c index 2c4ac51e54eb..ef8a6a7e5f7c 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2348,6 +2348,15 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, { struct page *page; + if (blk_mq_is_sbitmap_shared(set->flags)) { + if (tags->master) + tags = tags->master; + if (hctx_idx < set->nr_hw_queues - 1) { + blk_mq_clear_rq_mapping(set, tags, hctx_idx); + return; + } + } + if (tags->rqs && set->ops->exit_request) { int i; @@ -2444,6 +2453,12 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, size_t rq_size, left; int node; + if (blk_mq_is_sbitmap_shared(set->flags) && tags->master) { + memcpy(tags->static_rqs, tags->master->static_rqs, + sizeof(tags->static_rqs[0]) * tags->nr_tags); + return 0; + } + node = blk_mq_hw_queue_to_node(&set->map[HCTX_TYPE_DEFAULT], hctx_idx); if (node == NUMA_NO_NODE) node = set->numa_node; @@ -2860,6 +2875,9 @@ static bool __blk_mq_alloc_map_and_request(struct blk_mq_tag_set *set, if (!set->tags[hctx_idx]) return false; + blk_mq_set_master_tags(set->tags[hctx_idx], set->tags[0], flags, + hctx_idx); + ret = blk_mq_alloc_rqs(set, set->tags[hctx_idx], hctx_idx, set->queue_depth); if (!ret) diff --git a/block/blk-mq.h b/block/blk-mq.h index d08779f77a26..a08b89be6acc 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -354,5 +354,16 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, return __blk_mq_active_requests(hctx) < depth; } +static inline void blk_mq_set_master_tags(struct blk_mq_tags *tags, + struct blk_mq_tags *master_tags, unsigned int flags, + unsigned int hctx_idx) +{ + if (blk_mq_is_sbitmap_shared(flags)) { + if (hctx_idx) + tags->master = master_tags; + else + tags->master = NULL; + } +} #endif Thanks, Ming